diff --git a/.gitignore b/.gitignore
index 8e2474c..34d6b03 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 *.redb
 .claude/
 traces/
+worktrees/
diff --git a/Cargo.lock b/Cargo.lock
index f76186f..e3795da 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -88,31 +88,6 @@ version = "1.0.101"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea"
 
-[[package]]
-name = "async-openai"
-version = "0.27.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d126927c78e1562d7e8473008ac8b082318c04d69e3a83e3495a563f8b84a66"
-dependencies = [
- "backoff",
- "base64",
- "bytes",
- "derive_builder",
- "eventsource-stream",
- "futures",
- "rand 0.8.5",
- "reqwest",
- "reqwest-eventsource",
- "secrecy",
- "serde",
- "serde_json",
- "thiserror 2.0.18",
- "tokio",
- "tokio-stream",
- "tokio-util",
- "tracing",
-]
-
 [[package]]
 name = "async-trait"
 version = "0.1.89"
@@ -143,6 +118,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
 dependencies = [
  "axum-core",
+ "base64",
  "bytes",
  "form_urlencoded",
  "futures-util",
@@ -161,8 +137,10 @@ dependencies = [
  "serde_json",
  "serde_path_to_error",
  "serde_urlencoded",
+ "sha1",
  "sync_wrapper",
  "tokio",
+ "tokio-tungstenite 0.28.0",
  "tower 0.5.3",
  "tower-layer",
  "tower-service",
@@ -188,20 +166,6 @@ dependencies = [
  "tracing",
 ]
 
-[[package]]
-name = "backoff"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1"
-dependencies = [
- "futures-core",
- "getrandom 0.2.17",
- "instant",
- "pin-project-lite",
- "rand 0.8.5",
- "tokio",
-]
-
 [[package]]
 name = "base64"
 version = "0.22.1"
@@ -235,6 +199,15 @@ version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
 
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
 [[package]]
 name = "bollard"
 version = "0.18.1"
@@ -260,7 +233,7 @@ dependencies = [
  "serde_json",
  "serde_repr",
  "serde_urlencoded",
- "thiserror 2.0.18",
+ "thiserror",
  "tokio",
  "tokio-util",
  "tower-service",
@@ -340,12 +313,6 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
 
-[[package]]
-name = "cfg_aliases"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
-
 [[package]]
 name = "chrono"
 version = "0.4.43"
@@ -457,22 +424,21 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "core-foundation"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
-dependencies = [
- "core-foundation-sys",
- "libc",
-]
-
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "criterion"
 version = "0.5.1"
@@ -566,13 +532,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
 
 [[package]]
-name = "darling"
-version = "0.20.11"
+name = "crypto-common"
+version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
+checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
 dependencies = [
- "darling_core 0.20.11",
- "darling_macro 0.20.11",
+ "generic-array",
+ "typenum",
 ]
 
 [[package]]
@@ -581,22 +547,8 @@ version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
 dependencies = [
- "darling_core 0.23.0",
- "darling_macro 0.23.0",
-]
-
-[[package]]
-name = "darling_core"
-version = "0.20.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
-dependencies = [
- "fnv",
- "ident_case",
- "proc-macro2",
- "quote",
- "strsim",
- "syn",
+ "darling_core",
+ "darling_macro",
 ]
 
 [[package]]
@@ -614,25 +566,20 @@ dependencies = [
 
 [[package]]
 name = "darling_macro"
-version = "0.20.11"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
+checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
 dependencies = [
- "darling_core 0.20.11",
+ "darling_core",
  "quote",
  "syn",
 ]
 
 [[package]]
-name = "darling_macro"
-version = "0.23.0"
+name = "data-encoding"
+version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
-dependencies = [
- "darling_core 0.23.0",
- "quote",
- "syn",
-]
+checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea"
 
 [[package]]
 name = "deranged"
@@ -645,34 +592,13 @@ dependencies = [
 ]
 
 [[package]]
-name = "derive_builder"
-version = "0.20.2"
+name = "digest"
+version = "0.10.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
 dependencies = [
- "derive_builder_macro",
-]
-
-[[package]]
-name = "derive_builder_core"
-version = "0.20.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
-dependencies = [
- "darling 0.20.11",
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "derive_builder_macro"
-version = "0.20.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
-dependencies = [
- "derive_builder_core",
- "syn",
+ "block-buffer",
+ "crypto-common",
 ]
 
 [[package]]
@@ -723,17 +649,6 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
-[[package]]
-name = "eventsource-stream"
-version = "0.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab"
-dependencies = [
- "futures-core",
- "nom",
- "pin-project-lite",
-]
-
 [[package]]
 name = "fastrand"
 version = "2.3.0"
@@ -791,21 +706,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "futures"
-version = "0.3.31"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
-dependencies = [
- "futures-channel",
- "futures-core",
- "futures-executor",
- "futures-io",
- "futures-sink",
- "futures-task",
- "futures-util",
-]
-
 [[package]]
 name = "futures-channel"
 version = "0.3.31"
@@ -862,19 +762,12 @@ version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
 
-[[package]]
-name = "futures-timer"
-version = "3.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
-
 [[package]]
 name = "futures-util"
 version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
 dependencies = [
- "futures-channel",
  "futures-core",
  "futures-io",
  "futures-macro",
@@ -901,6 +794,16 @@ dependencies = [
  "windows-result",
 ]
 
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.2.17"
@@ -908,10 +811,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
 dependencies = [
  "cfg-if",
- "js-sys",
  "libc",
  "wasi",
- "wasm-bindgen",
 ]
 
 [[package]]
@@ -921,11 +822,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
 dependencies = [
  "cfg-if",
- "js-sys",
  "libc",
  "r-efi",
  "wasip2",
- "wasm-bindgen",
 ]
 
 [[package]]
@@ -938,7 +837,7 @@ dependencies = [
  "libc",
  "libgit2-sys",
  "log",
- "openssl-probe 0.1.6",
+ "openssl-probe",
  "openssl-sys",
  "url",
 ]
@@ -1113,7 +1012,6 @@ dependencies = [
  "hyper",
  "hyper-util",
  "rustls",
- "rustls-native-certs",
  "rustls-pki-types",
  "tokio",
  "tokio-rustls",
@@ -1379,22 +1277,13 @@ version = "0.3.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "357b7205c6cd18dd2c86ed312d1e70add149aea98e7ef72b9fdf0270e555c11d"
 dependencies = [
- "darling 0.23.0",
+ "darling",
  "indoc",
  "proc-macro2",
  "quote",
  "syn",
 ]
 
-[[package]]
-name = "instant"
-version = "0.1.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
-dependencies = [
- "cfg-if",
-]
-
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -1608,12 +1497,6 @@ dependencies = [
  "hashbrown 0.15.5",
 ]
 
-[[package]]
-name = "lru-slab"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
-
 [[package]]
 name = "matchers"
 version = "0.2.0"
@@ -1641,22 +1524,6 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
-[[package]]
-name = "mime_guess"
-version = "2.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
-dependencies = [
- "mime",
- "unicase",
-]
-
-[[package]]
-name = "minimal-lexical"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
-
 [[package]]
 name = "mio"
 version = "1.1.1"
@@ -1678,24 +1545,14 @@ dependencies = [
  "libc",
  "log",
  "openssl",
- "openssl-probe 0.1.6",
+ "openssl-probe",
  "openssl-sys",
  "schannel",
- "security-framework 2.11.1",
+ "security-framework",
  "security-framework-sys",
  "tempfile",
 ]
 
-[[package]]
-name = "nom"
-version = "7.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
-dependencies = [
- "memchr",
- "minimal-lexical",
-]
-
 [[package]]
 name = "notify"
 version = "8.2.0"
@@ -1809,12 +1666,6 @@ version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
 
-[[package]]
-name = "openssl-probe"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
-
 [[package]]
 name = "openssl-sys"
 version = "0.9.111"
@@ -1837,7 +1688,7 @@ dependencies = [
  "futures-sink",
  "js-sys",
  "pin-project-lite",
- "thiserror 2.0.18",
+ "thiserror",
  "tracing",
 ]
 
@@ -1869,7 +1720,7 @@ dependencies = [
  "opentelemetry_sdk",
  "prost",
  "reqwest",
- "thiserror 2.0.18",
+ "thiserror",
  "tokio",
  "tonic",
  "tracing",
@@ -1901,7 +1752,7 @@ dependencies = [
  "percent-encoding",
  "rand 0.9.2",
  "serde_json",
- "thiserror 2.0.18",
+ "thiserror",
  "tokio",
  "tokio-stream",
  "tracing",
@@ -2089,61 +1940,6 @@ version = "1.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
 
-[[package]]
-name = "quinn"
-version = "0.11.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
-dependencies = [
- "bytes",
- "cfg_aliases",
- "pin-project-lite",
- "quinn-proto",
- "quinn-udp",
- "rustc-hash",
- "rustls",
- "socket2",
- "thiserror 2.0.18",
- "tokio",
- "tracing",
- "web-time",
-]
-
-[[package]]
-name = "quinn-proto"
-version = "0.11.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
-dependencies = [
- "bytes",
- "getrandom 0.3.4",
- "lru-slab",
- "rand 0.9.2",
- "ring",
- "rustc-hash",
- "rustls",
- "rustls-pki-types",
- "slab",
- "thiserror 2.0.18",
- "tinyvec",
- "tracing",
- "web-time",
-]
-
-[[package]]
-name = "quinn-udp"
-version = "0.5.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
-dependencies = [
- "cfg_aliases",
- "libc",
- "once_cell",
- "socket2",
- "tracing",
- "windows-sys 0.60.2",
-]
-
 [[package]]
 name = "quote"
 version = "1.0.44"
@@ -2358,13 +2154,9 @@ dependencies = [
  "js-sys",
  "log",
  "mime",
- "mime_guess",
  "native-tls",
  "percent-encoding",
  "pin-project-lite",
- "quinn",
- "rustls",
- "rustls-native-certs",
  "rustls-pki-types",
  "serde",
  "serde_json",
@@ -2372,34 +2164,15 @@ dependencies = [
  "sync_wrapper",
  "tokio",
  "tokio-native-tls",
- "tokio-rustls",
- "tokio-util",
  "tower 0.5.3",
  "tower-http",
  "tower-service",
  "url",
  "wasm-bindgen",
  "wasm-bindgen-futures",
- "wasm-streams",
  "web-sys",
 ]
 
-[[package]]
-name = "reqwest-eventsource"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde"
-dependencies = [
- "eventsource-stream",
- "futures-core",
- "futures-timer",
- "mime",
- "nom",
- "pin-project-lite",
- "reqwest",
- "thiserror 1.0.69",
-]
-
 [[package]]
 name = "ring"
 version = "0.17.14"
@@ -2414,12 +2187,6 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
-[[package]]
-name = "rustc-hash"
-version = "2.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
-
 [[package]]
 name = "rustix"
 version = "0.38.44"
@@ -2453,32 +2220,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b"
 dependencies = [
  "once_cell",
- "ring",
  "rustls-pki-types",
  "rustls-webpki",
  "subtle",
  "zeroize",
 ]
 
-[[package]]
-name = "rustls-native-certs"
-version = "0.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
-dependencies = [
- "openssl-probe 0.2.1",
- "rustls-pki-types",
- "schannel",
- "security-framework 3.5.1",
-]
-
 [[package]]
 name = "rustls-pki-types"
 version = "1.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
 dependencies = [
- "web-time",
  "zeroize",
 ]
 
@@ -2571,16 +2324,6 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
-[[package]]
-name = "secrecy"
-version = "0.10.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a"
-dependencies = [
- "serde",
- "zeroize",
-]
-
 [[package]]
 name = "security-framework"
 version = "2.11.1"
@@ -2588,20 +2331,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
 dependencies = [
  "bitflags 2.10.0",
- "core-foundation 0.9.4",
- "core-foundation-sys",
- "libc",
- "security-framework-sys",
-]
-
-[[package]]
-name = "security-framework"
-version = "3.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef"
-dependencies = [
- "bitflags 2.10.0",
- "core-foundation 0.10.1",
+ "core-foundation",
  "core-foundation-sys",
  "libc",
  "security-framework-sys",
@@ -2721,6 +2451,17 @@ dependencies = [
  "time",
 ]
 
+[[package]]
+name = "sha1"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
 [[package]]
 name = "sharded-slab"
 version = "0.1.7"
@@ -2873,7 +2614,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b"
 dependencies = [
  "bitflags 2.10.0",
- "core-foundation 0.9.4",
+ "core-foundation",
  "system-configuration-sys",
 ]
 
@@ -2900,33 +2641,13 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
-[[package]]
-name = "thiserror"
-version = "1.0.69"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
-dependencies = [
- "thiserror-impl 1.0.69",
-]
-
 [[package]]
 name = "thiserror"
 version = "2.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
 dependencies = [
- "thiserror-impl 2.0.18",
-]
-
-[[package]]
-name = "thiserror-impl"
-version = "1.0.69"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
+ "thiserror-impl",
 ]
 
 [[package]]
@@ -2967,6 +2688,7 @@ dependencies = [
  "thrum-runner",
  "tokio",
  "tokio-stream",
+ "tokio-tungstenite 0.26.2",
  "tokio-util",
  "toml",
  "tower 0.5.3",
@@ -3011,7 +2733,7 @@ dependencies = [
  "proptest",
  "serde",
  "serde_json",
- "thiserror 2.0.18",
+ "thiserror",
  "toml",
  "tracing",
  "tracing-opentelemetry",
@@ -3030,8 +2752,9 @@ dependencies = [
  "serde",
  "serde_json",
  "tempfile",
- "thiserror 2.0.18",
+ "thiserror",
  "thrum-core",
+ "toml",
  "tracing",
 ]
 
@@ -3040,20 +2763,19 @@ name = "thrum-runner"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "async-openai",
  "async-trait",
  "bollard",
  "chrono",
  "futures-util",
  "git2",
+ "libc",
  "notify",
  "notify-debouncer-mini",
  "redb",
- "reqwest",
  "serde",
  "serde_json",
  "tempfile",
- "thiserror 2.0.18",
+ "thiserror",
  "thrum-core",
  "thrum-db",
  "tokio",
@@ -3113,21 +2835,6 @@ dependencies = [
  "serde_json",
 ]
 
-[[package]]
-name = "tinyvec"
-version = "1.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
-dependencies = [
- "tinyvec_macros",
-]
-
-[[package]]
-name = "tinyvec_macros"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
-
 [[package]]
 name = "tokio"
 version = "1.49.0"
@@ -3188,6 +2895,30 @@ dependencies = [
  "tokio-util",
 ]
 
+[[package]]
+name = "tokio-tungstenite"
+version = "0.26.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a9daff607c6d2bf6c16fd681ccb7eecc83e4e2cdc1ca067ffaadfca5de7f084"
+dependencies = [
+ "futures-util",
+ "log",
+ "tokio",
+ "tungstenite 0.26.2",
+]
+
+[[package]]
+name = "tokio-tungstenite"
+version = "0.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d25a406cddcc431a75d3d9afc6a7c0f7428d4891dd973e4d54c56b46127bf857"
+dependencies = [
+ "futures-util",
+ "log",
+ "tokio",
+ "tungstenite 0.28.0",
+]
+
 [[package]]
 name = "tokio-util"
 version = "0.7.18"
@@ -3436,16 +3167,50 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
 
 [[package]]
-name = "unarray"
-version = "0.1.4"
+name = "tungstenite"
+version = "0.26.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
+checksum = "4793cb5e56680ecbb1d843515b23b6de9a75eb04b66643e256a396d43be33c13"
+dependencies = [
+ "bytes",
+ "data-encoding",
+ "http",
+ "httparse",
+ "log",
+ "rand 0.9.2",
+ "sha1",
+ "thiserror",
+ "utf-8",
+]
+
+[[package]]
+name = "tungstenite"
+version = "0.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442"
+dependencies = [
+ "bytes",
+ "data-encoding",
+ "http",
+ "httparse",
+ "log",
+ "rand 0.9.2",
+ "sha1",
+ "thiserror",
+ "utf-8",
+]
+
+[[package]]
+name = "typenum"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
 
 [[package]]
-name = "unicase"
-version = "2.9.0"
+name = "unarray"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
+checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
 
 [[package]]
 name = "unicode-ident"
@@ -3500,6 +3265,12 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "utf-8"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
+
 [[package]]
 name = "utf8_iter"
 version = "1.0.4"
@@ -3524,6 +3295,12 @@ version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
 
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
 [[package]]
 name = "wait-timeout"
 version = "0.2.1"
@@ -3626,19 +3403,6 @@ dependencies = [
  "unicode-ident",
 ]
 
-[[package]]
-name = "wasm-streams"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
-dependencies = [
- "futures-util",
- "js-sys",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "web-sys",
-]
-
 [[package]]
 name = "web-sys"
 version = "0.3.85"
diff --git a/Cargo.toml b/Cargo.toml
index bb8b290..546b6eb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,12 +30,8 @@ toml = "0.8"
 # CLI
 clap = { version = "4.5", features = ["derive"] }
 
-# TUI
-ratatui = "0.29"
-crossterm = "0.28"
-
 # HTTP server
-axum = "0.8"
+axum = { version = "0.8", features = ["ws"] }
 tower = "0.5"
 tower-http = { version = "0.6", features = ["cors", "trace"] }
 
@@ -58,6 +54,9 @@ tracing-opentelemetry = "0.30"
 bollard = "0.18"
 futures-util = "0.3"
 
+# System / OS
+libc = "0.2"
+
 # File watching
 notify = "8"
 notify-debouncer-mini = "0.7"
@@ -65,9 +64,6 @@ notify-debouncer-mini = "0.7"
 # Git
 git2 = "0.19"
 
-# TOML parsing for consistency checks
-cargo_toml = "0.20"
-
 # Testing
 proptest = "1"
 loom = "0.7"
diff --git a/PLAN-THIN-THRUM.md b/PLAN-THIN-THRUM.md
new file mode 100644
index 0000000..0bf1f1d
--- /dev/null
+++ b/PLAN-THIN-THRUM.md
@@ -0,0 +1,451 @@
+# Plan: Thin Thrum — Pipeline Controller over Claude Code
+
+## Executive Summary
+
+Thrum is currently **40,340 lines of Rust** across 4 crates. Most of that code
+reimplements things Claude Code 2.1.71 now does natively. The plan is to strip
+Thrum down to a **thin pipeline controller** (~3-5K lines) that:
+
+1. Manages a **durable task queue** with gated state machine
+2. Spawns Claude Code sessions with **full real-time visibility**
+3. Runs **deterministic gate checks** (cargo test, clippy, Z3, cross-repo)
+4. Provides a **dashboard** showing what every agent is doing right now
+5. Handles **human approval** checkpoints
+
+Everything else — worktree management, sandbox, agent prompts, session
+continuation, retry with memory, file operations — is Claude Code's job.
+
+---
+
+## The Visibility Problem (Solved)
+
+**Current**: Thrum spawns `claude -p "prompt" --output-format json` and waits
+for it to finish. You see nothing until the agent is done (or times out).
+
+**New**: Use `claude -p "prompt" --output-format stream-json --include-partial-messages`
+which emits **real-time NDJSON** with every event:
+
+```jsonl
+{"type":"system","subtype":"init","session_id":"...","tools":["Bash","Read","Edit",...]}
+{"type":"stream_event",...}  // partial token chunks
+{"type":"assistant","message":{"content":[{"type":"tool_use","name":"Edit","input":{...}}]}}
+{"type":"assistant","message":{"content":[{"type":"text","text":"I'll fix the bug in..."}]}}
+{"type":"result","total_cost_usd":0.0653,"num_turns":3,"duration_ms":45000}
+```
+
+Each `tool_use` event shows the tool name and input in real time. Each `text`
+event shows what the agent is thinking/saying. The dashboard can render this
+as live tool-call cards — the exact feature TASK-0053 was trying to build
+manually, but now it's free from Claude Code itself.
+
+**Alternative for full interactive visibility**: `claude remote-control --name "TASK-0051"`
+starts a session visible at claude.ai/code. Thrum could open one per agent and
+you'd watch them live in your browser.
+
+---
+
+## Architecture: What Stays, What Goes
+
+### KEEP (Thrum's unique value)
+
+| Component | Lines | Why |
+|-----------|-------|-----|
+| **Task state machine** | ~300 | `Pending→Implementing→Gate1→Reviewing→Gate2→AwaitingApproval→Approved→Integrating→Merged` — durable, survives restarts |
+| **Task queue + DB** | ~700 | redb persistence, claim/dispatch, retry count, dependency tracking |
+| **Gate runner** | ~400 | Run `cargo test`, `cargo clippy`, `cargo fmt`, Z3/Rocq, mutants — deterministic checks, no AI needed |
+| **Integration gate** | ~200 | Cross-repo pipeline (meld→loom→synth), merge-to-main |
+| **Budget tracker** | ~150 | Track spend per task from Claude Code's `total_cost_usd` in result events |
+| **Dashboard + API** | ~1500 | Task list, approval UI, live agent activity from stream events, SSE push |
+| **Pipeline orchestrator** | ~800 | Dispatch loop, claim priority, semaphore, sequential merge queue |
+| **Traceability** | ~200 | Link tasks→branches→commits→gate results for audit |
+| **Total** | **~4,350** | |
+
+### REMOVE (Claude Code does it better)
+
+| Component | Current Lines | Replacement |
+|-----------|--------------|-------------|
+| `subprocess.rs` | 550 | `claude -p --output-format stream-json` — parse NDJSON |
+| `claude.rs` | 200 | Direct `claude` invocation with `--worktree`, `--resume`, `--max-budget-usd` |
+| `worktree.rs` | 200 | `claude --worktree` creates + cleans up worktrees automatically |
+| `sandbox.rs` | 781 | `claude` has built-in seatbelt sandbox |
+| `shutdown.rs` | 695 | PID tracking unnecessary — `claude` manages its own processes |
+| `sync.rs` | 687 | `git fetch/pull` can be simple bash calls, not a module |
+| `coordination_hub.rs` | ~300 | File-lock awareness → Claude Code's hooks system |
+| `watcher.rs` | ~200 | File system watching → unnecessary |
+| `anthropic.rs` | ~300 | Direct API backend → use Claude Code as the only backend |
+| `openai_compat.rs` | ~200 | OpenAI compat backend → remove |
+| `cli_agent.rs` | ~150 | Generic CLI agent → remove |
+| `backend.rs` | ~200 | Backend trait abstraction → single concrete implementation |
+| `session_export.rs` (both) | ~200 | Claude Code has `--resume` and session persistence |
+| `ci.rs` (runner) | 1209 | CI integration → out of scope for thin version |
+| `a2a.rs` (both) | ~1540 | Agent-to-Agent protocol → premature, remove |
+| `consistency.rs` | ~400 | Cross-repo checks → simplify to gate |
+| `convergence.rs` | ~300 | Failure pattern detection → Claude Code's memory handles this |
+| `harness.rs` | ~1100 | Self-improving harness → future work, not core |
+| `safety.rs` | ~300 | TCL/ASIL classification → documentation, not runtime |
+| `sphinx_needs.rs` | ~200 | Requirements tracing → remove |
+| `trust.rs` | ~300 | Trust boundaries → keep as config, remove runtime |
+| `verification.rs` | ~989 | Tag-based verification → simplify |
+| `watch.rs` (cli) | 1392 | TUI dashboard → web dashboard is better |
+| **Total removed** | **~11,000+** | |
+
+### SIMPLIFY
+
+| Component | From | To |
+|-----------|------|-----|
+| `event.rs` | 1251 lines, 30+ event kinds | ~100 lines, 5 events: TaskClaimed, GatePassed, GateFailed, AgentStream, TaskMerged |
+| `gate.rs` | 1415 lines | ~400 lines — just run commands, collect pass/fail |
+| `parallel.rs` | 3935 lines (!) | ~800 lines — dispatch loop, invoke claude, parse stream, run gates |
+| `dashboard.rs` | 2620 lines | ~800 lines — task list, approval buttons, live stream viewer |
+| `lib.rs` (api) | 2961 lines | ~500 lines — REST endpoints + SSE |
+| `main.rs` (cli) | 2533 lines | ~400 lines — run, task add/list/approve/reject, status |
+| `role.rs` | ~200 lines | Remove — Claude Code's `--agent` flag replaces role system |
+| `agent.rs` | ~200 lines | Remove — agent prompts go in `.claude/agents/*.md` |
+| `checkpoint.rs` | ~300 lines | Remove — Claude Code's session persistence replaces |
+
+---
+
+## New Claude Code Integration Layer
+
+Replace the entire `backend` trait + `subprocess` + `claude` + `worktree` +
+`sandbox` stack with a single module:
+
+```rust
+/// Spawn a Claude Code session and stream its output.
+///
+/// Returns a stream of AgentEvents parsed from NDJSON.
+pub async fn spawn_agent(
+    task: &Task,
+    repo: &RepoConfig,
+    prompt: &str,
+    budget_usd: f64,
+) -> Result<(JoinHandle<AgentResult>, mpsc::Receiver<AgentEvent>)> {
+    let mut cmd = Command::new("claude");
+    cmd.arg("-p").arg(prompt)
+       .arg("--output-format").arg("stream-json")
+       .arg("--include-partial-messages")
+       .arg("--worktree")                    // Claude creates + cleans worktree
+       .arg("--permission-mode").arg("auto") // or bypassPermissions in sandbox
+       .arg("--max-budget-usd").arg(budget_usd.to_string())
+       .arg("--model").arg("claude-opus-4-6")
+       .current_dir(&repo.path)
+       .env_remove("CLAUDECODE")
+       .env_remove("CLAUDE_CODE_ENTRYPOINT")
+       .stdout(Stdio::piped());
+
+    // Optional: resume previous session on retry
+    if let Some(session_id) = &task.session_id {
+        cmd.arg("--resume").arg(session_id);
+    }
+
+    // Optional: use custom agent definition
+    if let Some(agent) = &task.agent {
+        cmd.arg("--agent").arg(agent);
+    }
+
+    let child = cmd.spawn()?;
+    let stdout = BufReader::new(child.stdout.take().unwrap());
+
+    // Parse NDJSON stream into typed events
+    let (tx, rx) = mpsc::channel(256);
+    let handle = tokio::spawn(async move {
+        let mut lines = stdout.lines();
+        while let Some(line) = lines.next_line().await? {
+            if let Ok(event) = serde_json::from_str::<StreamEvent>(&line) {
+                match &event {
+                    StreamEvent::Init { session_id, .. } => {
+                        // Store session_id for resume on retry
+                        tx.send(AgentEvent::SessionStarted(session_id)).await;
+                    }
+                    StreamEvent::Assistant { message } => {
+                        for content in &message.content {
+                            match content {
+                                Content::ToolUse { name, input } => {
+                                    tx.send(AgentEvent::ToolCall {
+                                        tool: name.clone(),
+                                        input: input.clone(),
+                                    }).await;
+                                }
+                                Content::Text { text } => {
+                                    tx.send(AgentEvent::Text(text.clone())).await;
+                                }
+                            }
+                        }
+                    }
+                    StreamEvent::Result { total_cost_usd, result, .. } => {
+                        return Ok(AgentResult {
+                            output: result.clone(),
+                            cost_usd: *total_cost_usd,
+                            session_id: event.session_id().cloned(),
+                        });
+                    }
+                }
+            }
+        }
+    });
+
+    Ok((handle, rx))
+}
+```
+
+This single function replaces:
+- `subprocess.rs` (550 lines)
+- `claude.rs` (200 lines)
+- `worktree.rs` (200 lines)
+- `sandbox.rs` (781 lines)
+- `shutdown.rs` (695 lines)
+- `backend.rs` (200 lines)
+- Half of `parallel.rs` (~2000 lines)
+
+**~4,600 lines → ~150 lines.**
+
+---
+
+## New Pipeline (simplified parallel.rs)
+
+```
+loop {
+    // 1. Claim next task (priority: RetryableFailed > Approved > Pending)
+    let task = claim_next(&db)?;
+
+    match task.status {
+        Pending | RetryableFailed => {
+            // IMPLEMENT: spawn Claude Code agent
+            let (handle, events) = spawn_agent(&task, &repo, &prompt, budget).await?;
+
+            // Forward events to dashboard via SSE
+            while let Some(event) = events.recv().await {
+                event_bus.emit(AgentEvent(task.id, event));
+            }
+
+            let result = handle.await?;
+            if !result.has_changes() {
+                task.status = Gate1Failed("no changes");
+                continue;
+            }
+
+            // GATE 1: cargo test + clippy + fmt (deterministic, no AI)
+            let gate1 = run_gate_checks(&repo)?;
+            if !gate1.passed {
+                task.status = Gate1Failed(gate1);
+                continue;
+            }
+
+            // REVIEW: spawn Claude Code with reviewer agent
+            let review = spawn_agent(&task, &repo, &review_prompt, 1.0).await?;
+
+            // GATE 2: proof checks if configured
+            let gate2 = run_proof_checks(&repo)?;
+
+            // → AwaitingApproval (human reviews in dashboard)
+            task.status = AwaitingApproval { gate1, review, gate2 };
+        }
+
+        Approved => {
+            // INTEGRATE: merge to main (no AI needed, pure git)
+            merge_to_main(&repo, &task.branch())?;
+            run_integration_gate(&repos)?;  // cross-repo if configured
+            task.status = Merged;
+        }
+    }
+}
+```
+
+---
+
+## Dashboard: Real-Time Agent Visibility
+
+The new dashboard gets **live tool-call streams** for free:
+
+```
+┌─────────────────────────────────────────────────────┐
+│ TASK-0051: Add chat injection                       │
+│ Status: Implementing (3m 22s)     Cost: $0.42       │
+│                                                     │
+│ ┌─ Agent Activity ────────────────────────────────┐ │
+│ │ 🔧 Read crates/thrum-api/src/dashboard.rs       │ │
+│ │ 🔧 Grep "SSE" --type rust                      │ │
+│ │ 💬 "I'll add a POST endpoint that sends..."     │ │
+│ │ 🔧 Edit crates/thrum-api/src/lib.rs  [lines 45] │ │
+│ │ 🔧 Bash cargo test --package thrum-api          │ │
+│ │ ✅ Test passed                                   │ │
+│ │ 🔧 Edit crates/thrum-api/src/dashboard.rs       │ │
+│ │ ...                                    ▼ live   │ │
+│ └─────────────────────────────────────────────────┘ │
+├─────────────────────────────────────────────────────┤
+│ TASK-0053: Streaming tool cards          Pending    │
+│ TASK-0054: Inline config editing         Pending    │
+└─────────────────────────────────────────────────────┘
+```
+
+Each `AgentEvent::ToolCall` from the stream-json output renders as a card.
+Each `AgentEvent::Text` renders as agent commentary. No custom parsing of
+Claude's internal format needed — the stream-json protocol gives us structured
+events.
+
+**Alternative**: For even richer visibility, use `claude remote-control`
+per-agent and embed the claude.ai/code URLs in the dashboard as iframes
+or links. You'd see the full Claude Code UI per agent.
+
+---
+
+## Migration Path
+
+### Phase 1: Stream visibility (immediate value, ~2 days)
+
+1. Change `claude.rs` to use `--output-format stream-json --include-partial-messages`
+2. Parse NDJSON stream, extract tool_use/text/result events
+3. Forward to event bus → SSE → dashboard
+4. Dashboard renders live tool-call cards per agent
+5. **Result**: You can see what every agent is doing in real-time
+
+### Phase 2: Delegate worktree + sandbox (~1 day)
+
+1. Add `--worktree` flag to claude invocation
+2. Remove `worktree.rs`, `sandbox.rs`
+3. Remove seatbelt profile generation
+4. Claude Code manages worktree lifecycle
+
+### Phase 3: Simplify pipeline (~3 days)
+
+1. Remove `backend.rs` trait, `anthropic.rs`, `openai_compat.rs`, `cli_agent.rs`
+2. Inline claude invocation directly in pipeline
+3. Remove `shutdown.rs` PID tracking (Claude manages its own)
+4. Remove agent prompt loading (use `--agent` flag or `.claude/agents/*.md`)
+5. Add `--max-budget-usd` per task instead of manual budget tracking
+6. Use Claude Code's `session_id` from init event for `--resume` on retries
+
+### Phase 4: Cut dead weight (~2 days)
+
+1. Remove `a2a.rs` (both crates), `ci.rs`, `sphinx_needs.rs`
+2. Remove `harness.rs`, `convergence.rs`, `safety.rs`
+3. Remove `consistency.rs` (fold into gate if needed)
+4. Remove `coordination_hub.rs`, `watcher.rs`
+5. Simplify `event.rs` to 5 core events
+6. Remove `watch.rs` TUI (web dashboard is primary)
+
+### Phase 5: Leverage Claude Code plugins (~1 day)
+
+1. Create a `.claude/agents/implementer.md` with Thrum's agent prompt
+2. Create a `.claude/agents/reviewer.md` with Thrum's reviewer prompt
+3. Use `--agent implementer` and `--agent reviewer` instead of
+   loading prompt files and `--system-prompt`
+4. Consider creating a Thrum plugin for Claude Code's plugin system
+
+---
+
+## Ultra-Thin Variant: Agent Teams
+
+Claude Code's agent teams (`CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1`) combine
+tmux + worktree isolation to run multiple agents concurrently. Each teammate
+gets its own tmux pane, its own git worktree, and coordination via hooks
+(`TeammateIdle`, `TaskCompleted`).
+
+This means Thrum's parallel dispatch engine (`parallel.rs` at ~3,935 lines)
+can be replaced by a **task feeder** that:
+1. Pops tasks from the durable queue
+2. Invokes `claude -p --output-format stream-json --worktree` per task
+3. Parses the stream for visibility
+4. Runs deterministic gates after agent completion
+5. Feeds approval results back
+
+Both Thrum and agent teams are experimental — no reason to avoid the leaner
+path. If agent teams stabilize (expected soon), Thrum can optionally delegate
+concurrency entirely. Until then, Thrum manages its own tokio-spawned agents
+using the same `claude -p` invocation.
+
+### Line Count Projection (Ultra-Thin)
+
+| Component | Original | Current | Thin | Ultra-Thin |
+|-----------|----------|---------|------|------------|
+| thrum-core | 13,977 | 13,859 | ~1,500 | ~1,200 |
+| thrum-db | 4,130 | 2,647 | ~1,200 | ~800 |
+| thrum-runner | 11,321 | 10,301 | ~1,200 | ~600 |
+| thrum-api | 6,904 | 6,848 | ~1,800 | ~1,200 |
+| thrum-cli | 4,008 | 3,879 | ~400 | ~300 |
+| **Total** | **40,340** | **39,202** | **~6,100** | **~4,100** |
+
+---
+
+## Execution Progress
+
+The migration happens by **rewriting the core**, then **deleting** what becomes
+unreferenced. Not the reverse — removing modules first would break compilation.
+
+### Step 1: ✅ New `claude_code.rs` integration (DONE)
+
+Created `crates/thrum-runner/src/claude_code.rs` (~390 lines) with:
+- `AgentConfig`, `AgentEvent`, `AgentResult`, `AgentHandle`, `AiResponse` types
+- `spawn_agent()`: invokes `claude -p` with `--output-format stream-json`,
+  parses NDJSON, streams typed events via mpsc channel
+- `invoke_streaming()`: bridge that returns `AiResponse` for pipeline compat
+- `load_agent_prompt()`: reads agent .md files, replaces `{{CLAUDE_MD}}`
+- `health_check()`: runs `claude --version`
+
+### Step 2: ✅ Delete old backend stack (DONE)
+
+Deleted 5 modules (1,578 lines):
+- `backend.rs` (618 LOC) — AiBackend trait, BackendRegistry, AiRequest
+- `claude.rs` (385 LOC) — Claude CLI wrapper via subprocess
+- `anthropic.rs` (193 LOC) — Anthropic Messages API
+- `openai_compat.rs` (191 LOC) — OpenAI/Mistral/custom providers
+- `cli_agent.rs` (191 LOC) — generic CLI agent wrapper
+
+Also migrated `ci.rs::dispatch_ci_fixer()` and `main.rs::invoke_planner()`
+to use `claude_code::invoke_streaming()`. Removed unused `async-openai` and
+`reqwest` dependencies.
+
+**Current LOC**: 39,202 (down from 40,340 — net -1,138)
+
+### Step 3: Simplify deeply-integrated modules (IN PROGRESS)
+
+Analysis revealed most thrum-core modules are deeply woven into the pipeline:
+- `spec.rs`, `verification.rs`, `convergence.rs`, `harness.rs` → used by
+  gate execution and task state
+- `trust.rs`, `traceability.rs`, `memory.rs` → used by task, API, dashboard
+- `telemetry.rs`, `a2a.rs` → used by CLI, API
+- `subsample.rs` → used by gate.rs
+
+Only `consistency.rs` and `safety.rs` are truly isolated (CLI-only). Mass
+deletion would require cascading changes across all crates.
+
+**Next actions**:
+1. Simplify `parallel.rs` — remove sandbox profile creation, observer mode,
+   and watcher setup that are no longer needed in the thin architecture
+2. Remove `consistency.rs` and `safety.rs` with their CLI commands
+3. Simplify `shutdown.rs` — the old ProcessTracker was designed for managing
+   ClaudeCliBackend subprocesses; now it just needs startup recovery
+4. Make `a2a.rs`, `sphinx_needs.rs`, `harness.rs` optional (feature-gated)
+
+### Step 4: Simplify what remains (TODO)
+
+- `event.rs`: 1,251 → ~100 lines (5 events)
+- `gate.rs`: 1,415 → ~400 lines (just run commands)
+- `dashboard.rs`: 2,620 → ~800 lines (task list, approval, live stream)
+- `main.rs`: 2,533 → ~300 lines (run, task, status)
+
+---
+
+## What You Gain
+
+1. **Visibility**: See every tool call, every file edit, every bash command
+   in real-time on the dashboard
+2. **Reliability**: Claude Code's worktree/sandbox/session management is
+   battle-tested across millions of users. Our custom implementations had
+   bugs (empty-branch false positive, stuck integrating, merge conflicts)
+3. **Free upgrades**: Every Claude Code release improves your agents —
+   memory, plugins, better prompting, performance
+4. **Less maintenance**: 90% less code = 90% fewer bugs to fix
+5. **Plugin ecosystem**: Use `/code-review`, `/feature-dev`, `security-guidance`
+   hooks — all free
+6. **Budget accuracy**: `total_cost_usd` from stream-json is the real cost,
+   not our estimates
+
+## What You Lose
+
+1. **Backend flexibility**: No more swapping to OpenAI/Anthropic API directly.
+   Claude Code is the only backend. (Acceptable — it's better anyway)
+2. **Fine-grained process control**: Claude Code manages its own processes.
+   Less control over timeout behavior. (Mitigated by `--max-budget-usd`)
+3. **A2A protocol**: The agent-to-agent protocol gets removed. (Premature
+   anyway — revisit when Claude Code's agent teams mature)
diff --git a/agents/ci_fixer.md b/agents/ci_fixer.md
new file mode 100644
index 0000000..ec9526d
--- /dev/null
+++ b/agents/ci_fixer.md
@@ -0,0 +1,40 @@
+# CI Fix Agent
+
+You are a CI Fix Agent for the Thrum autonomous development pipeline.
+Your sole job is to fix CI failures on a pull request branch.
+
+## Context
+
+{{CLAUDE_MD}}
+
+## Process
+
+1. **Read the CI failure logs** provided in the prompt carefully
+2. **Identify the root cause** — build error, test failure, lint issue, type error, etc.
+3. **Make the minimum necessary fix** — only change what's needed to make CI pass
+4. **Run relevant checks locally** to verify your fix before committing:
+   - `cargo fmt --check` for formatting issues
+   - `cargo clippy` for lint issues
+   - `cargo test` for test failures
+   - `cargo build` for build errors
+5. **Commit the fix** with a clear message like `fix: resolve CI failure in <component>`
+
+## Rules
+
+- Make **MINIMAL** changes — only fix the CI failure
+- Do **NOT** refactor, add features, or restructure code
+- Do **NOT** modify CI configuration unless the config itself is the bug
+- Do **NOT** change test expectations unless the test is genuinely wrong
+- If the fix requires understanding broader context, read the relevant source files first
+- Commit your fix before exiting — uncommitted changes will be lost
+
+## Common CI Failures
+
+- **cargo fmt**: Run `cargo fmt` to auto-fix formatting
+- **cargo clippy**: Read the clippy suggestion and apply the recommended fix
+- **cargo test**: Read the test failure, understand the assertion, fix the code or test
+- **cargo build**: Read the compiler error, fix the type/lifetime/borrow issue
+
+## Output
+
+After fixing, briefly summarize what you changed and why.
diff --git a/agents/implementer_thrum.md b/agents/implementer_thrum.md
new file mode 100644
index 0000000..54c672f
--- /dev/null
+++ b/agents/implementer_thrum.md
@@ -0,0 +1,41 @@
+# Thrum Implementer
+
+You are the Implementation Agent for the **thrum** orchestration engine.
+You implement tasks by writing code and tests following thrum's conventions exactly.
+
+## Target Repo Conventions
+
+The following is the complete CLAUDE.md for the thrum repository. Follow
+every instruction precisely.
+
+{{CLAUDE_MD}}
+
+## Implementation Workflow
+
+1. Read the task description and acceptance criteria carefully
+2. Understand the existing crate structure before making changes:
+   - `thrum-core`: Domain types (Task, Gate, Repo, Budget)
+   - `thrum-db`: Persistence via redb
+   - `thrum-runner`: Subprocess management, parallel engine, sandbox
+   - `thrum-api`: HTTP API and web dashboard
+   - `thrum-cli`: CLI binary
+3. Write the implementation in the appropriate crate
+4. Write tests for new functionality
+5. Run `cargo fmt` to format code
+6. Run `cargo clippy --workspace --tests -- -D warnings` and fix warnings
+7. Run `cargo test --workspace` to verify all tests pass
+8. **Commit your work**: `git add -A && git commit -m "descriptive message"`
+   - You MUST commit before finishing. Uncommitted work is lost.
+   - A pre-commit hook will run cargo fmt and clippy. If it fails, fix the issues and try again.
+   - Do NOT use `--no-verify` — the hook exists to catch problems early.
+
+## Working Directory
+
+Your current working directory IS the repo root. All source files are here.
+Do NOT navigate to any other directory or use absolute paths from CLAUDE.md
+or config files. Stay in your current working directory for ALL operations.
+
+## Branch Convention
+
+You are working on a branch created by thrum. Make commits with
+clear messages describing what changed and why.
diff --git a/agents/planner.md b/agents/planner.md
index bcc6de6..409c6b8 100644
--- a/agents/planner.md
+++ b/agents/planner.md
@@ -21,9 +21,36 @@ produce a prioritized queue of implementation tasks.
    - **Title**: Clear, imperative description
    - **Repo**: Which repo this targets
    - **Description**: What needs to change and why
-   - **Acceptance criteria**: Specific, testable conditions
+   - **Acceptance criteria**: Specific, testable conditions with verification tags
    - **Requirement ID**: If traceable to a formal requirement
 
+## Verification-Tagged Acceptance Criteria
+
+Every acceptance criterion MUST have a verification tag specifying HOW it will be
+verified. If it matters, there must be a concrete, automated verification mechanism.
+"Hope someone reads the code" is not acceptable.
+
+Valid tags:
+- **(TEST)** — Verified by automated tests (unit, integration, property-based)
+- **(LINT)** — Verified by linting / static analysis (clippy, eslint, etc.)
+- **(BENCH)** — Verified by benchmarks / performance tests
+- **(MANUAL)** — Requires manual human verification
+- **(BROWSER)** — Verified by browser / UI testing
+- **(SECURITY)** — Verified by security audit / scanning
+
+Each criterion must be:
+1. **Concrete** — not vague ("make it better" is rejected)
+2. **Measurable** — clear pass/fail condition
+3. **Tagged** — ends with a verification tag in parentheses
+
+Examples:
+- "All unit tests pass including new coverage (TEST)"
+- "No clippy warnings on the changed crate (LINT)"
+- "P99 latency below 50ms on /api/tasks (BENCH)"
+- "Dashboard shows per-criterion verification status (BROWSER)"
+- "No known CVEs in dependency tree (SECURITY)"
+- "Architecture documentation reviewed by maintainer (MANUAL)"
+
 ## Priority Rules
 1. P0: Cross-repo consistency (version drift, unpinned deps)
 2. P0: Blocking integration (e.g., shared type definitions)
@@ -32,19 +59,45 @@ produce a prioritized queue of implementation tasks.
 5. P3: Quality improvements, documentation
 
 ## Output Format
-Produce a JSON array of task objects:
+Produce a JSON array of task objects. Every acceptance criterion must include
+a verification tag:
 ```json
 [
   {
     "repo": "loom",
     "title": "Add i32.popcnt to ISLE pipeline",
     "description": "...",
-    "acceptance_criteria": ["..."],
+    "acceptance_criteria": [
+      "cargo test passes with new popcnt tests (TEST)",
+      "No clippy warnings (LINT)",
+      "Z3 translation validation proof added (TEST)"
+    ],
     "requirement_id": "REQ-LOOM-042"
   }
 ]
 ```
 
+## Trust Boundaries & Risk Assessment
+
+Repositories may have trust boundary configurations in `[repo.trust]` that classify
+files by risk level. When planning tasks, consider the trust implications:
+
+- **high_risk** files (e.g. `src/crypto/**`, `Cargo.lock`): Changes CANNOT be
+  auto-approved and must go through manual human review. Plan extra time.
+- **security_sensitive** files (e.g. `Cargo.toml`, `build.rs`, `.github/**`):
+  Changes trigger extra security checks (cargo-audit, cargo-deny). May add latency.
+- **auto_ok** files (e.g. `docs/**`, `*.md`): Safe for fast-path approval.
+
+When producing tasks, include a `risk_assessment` field if trust boundaries apply:
+```json
+{
+  "repo": "loom",
+  "title": "Update cryptographic key derivation",
+  "risk_assessment": "HIGH — touches src/crypto/** (trust:high_risk)",
+  "description": "..."
+}
+```
+
 ## Cross-Repo Awareness
 - Changes to shared types (Instruction/WasmOp enums) need coordinated tasks
 - wasmparser upgrades must be synced across all repos
diff --git a/configs/pipeline.toml b/configs/pipeline.toml
index 82b129f..b3b3292 100644
--- a/configs/pipeline.toml
+++ b/configs/pipeline.toml
@@ -9,7 +9,7 @@
 # agents can work concurrently on the same repo without index conflicts.
 
 [engine]
-per_repo_limit = 3
+per_repo_limit = 4
 worktrees_dir = "worktrees"
 max_retries = 10  # Reset via dashboard retry button to give a task another round
 
@@ -72,7 +72,7 @@ checksums = "sha256"
 # Overall spending ceiling and per-session timeout for AI agents.
 
 [budget]
-ceiling_usd = 1000.0
+ceiling_usd = 3000.0
 per_session_timeout_secs = 600
 
 [budget.allocation]
@@ -98,7 +98,7 @@ type = "agent"
 command = "claude"
 prompt_args = ["-p", "{prompt}", "--output-format", "json"]
 model = "claude-opus-4-6"
-timeout_secs = 1200
+timeout_secs = 2400
 enabled = true
 
 # Uncomment to add OpenCode as an alternative agent:
@@ -149,34 +149,56 @@ enabled = true
 # Map pipeline stages to AI backends and prompt templates.
 # Backend values reference a registered backend by name or model substring.
 # e.g., "opus" resolves to any backend whose model contains "opus".
+#
+# timeout_recovery: what to do when an agent invocation times out:
+#   "retry"  — Resume from checkpoint (session continuation). Best for implementers.
+#   "skip"   — Skip the timed-out step. For reviewers: auto-approve with note.
+#   "extend" — Double the timeout and retry once. Falls back to fail.
+#   "fail"   — Treat timeout as failure (default).
 
 [roles.implementer]
 backend = "opus"
 prompt_template = "agents/implementer.md"
 budget_usd = 6.0
-timeout_secs = 1200
+timeout_secs = 2400
+timeout_recovery = "retry"    # Resume from checkpoint on timeout
 
 [roles.reviewer]
 backend = "sonnet"
 prompt_template = "agents/reviewer.md"
 budget_usd = 1.0
 timeout_secs = 300
+timeout_recovery = "skip"     # Auto-approve with "review-skipped-timeout" note
 
 [roles.planner]
 backend = "opus"
 prompt_template = "agents/planner.md"
 budget_usd = 1.0
 timeout_secs = 300
+timeout_recovery = "fail"     # Planning timeout = real failure
+
+[roles.ci_fixer]
+backend = "opus"
+prompt_template = "agents/ci_fixer.md"
+budget_usd = 3.0
+timeout_secs = 600
+timeout_recovery = "retry"    # Resume CI fix from checkpoint
 
 # ── Sandbox ───────────────────────────────────────────────────────────
 # Resource limits for agent subprocess execution.
-# backend: "none" (no isolation), "docker", "nsjail", etc.
+# backend:
+#   "none"      — no isolation (passthrough)
+#   "os-native" — enforce seatbelt (macOS) / bubblewrap (Linux)
+#   "observe"   — run without enforcement, audit writes after execution
+#                  and log which operations WOULD be denied. Useful for
+#                  debugging sandbox profiles before enabling enforcement.
+#   "docker"    — Docker container isolation
 
 [sandbox]
-backend = "none"
+backend = "os-native"
 memory_limit_mb = 4096
 cpu_limit = 2.0
-network = false
+network = true
 
 # ── Subsampling ───────────────────────────────────────────────────────
 # Run a fraction of gate checks to speed up iteration.
diff --git a/crates/thrum-api/Cargo.toml b/crates/thrum-api/Cargo.toml
index af401fb..6ba6a76 100644
--- a/crates/thrum-api/Cargo.toml
+++ b/crates/thrum-api/Cargo.toml
@@ -27,3 +27,4 @@ futures-util = { workspace = true }
 tempfile = "3"
 reqwest = { workspace = true }
 tokio = { workspace = true }
+tokio-tungstenite = "0.26"
diff --git a/crates/thrum-api/assets/dashboard.html b/crates/thrum-api/assets/dashboard.html
index 9c1ed60..bd52764 100644
--- a/crates/thrum-api/assets/dashboard.html
+++ b/crates/thrum-api/assets/dashboard.html
@@ -3,8 +3,10 @@
 <head>
     <meta charset="utf-8">
     <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link rel="icon" type="image/svg+xml" href="/dashboard/assets/favicon.svg">
     <title>Thrum Dashboard</title>
     <link rel="stylesheet" href="/dashboard/assets/style.css">
+    <link rel="stylesheet" href="/dashboard/assets/help.css">
     <script src="https://unpkg.com/htmx.org@2.0.4"
             integrity="sha384-HGfztofotfshcF7+8n44JQL2oJmowVChPTg48S+jvZoztPfvwD79OC/LTtG6dMp+"
             crossorigin="anonymous"></script>
@@ -18,33 +20,68 @@ <h1>thrum</h1>
                 <span class="connection-dot" id="conn-dot"></span>
                 <span class="htmx-indicator pulse" id="poll-indicator"></span>
                 dashboard
+                <a href="/dashboard/help" class="header-help-link" title="Pipeline reference and documentation">?</a>
             </div>
         </header>
 
-        <!-- Budget Usage — polls every 15s, morph preserves form state -->
+        <!-- Budget Usage — event-driven, refreshes on BudgetUpdated events -->
         <div id="budget-bar"
              hx-get="/dashboard/partials/budget"
-             hx-trigger="load, every 15s"
+             hx-trigger="load, refreshBudget"
              hx-swap="morph:innerHTML"
              hx-indicator="#poll-indicator">
         </div>
 
-        <!-- Status Counts — polls every 10s -->
+        <!-- Status Counts — event-driven, refreshes on task state changes -->
         <div id="status-counts"
              hx-get="/dashboard/partials/status"
-             hx-trigger="load, every 10s, refreshNow"
+             hx-trigger="load, refreshStatus"
              hx-swap="morph:innerHTML"
              hx-indicator="#poll-indicator">
         </div>
 
-        <!-- Task Queue — polls every 15s, morph preserves dropdowns/checkboxes -->
+        <!-- Pipeline Legend — collapsible key showing pipeline flow -->
+        <details class="pipeline-legend">
+            <summary>Pipeline Legend &mdash; hover timeline steps for details</summary>
+            <div class="legend-content">
+                <div class="legend-flow">
+                    <a href="/dashboard/help#pending" class="timeline-step" title="Pending: Task is queued">P</a>
+                    <span class="flow-arrow">&rarr;</span>
+                    <a href="/dashboard/help#implementing" class="timeline-step active" title="Implementing: Agent writing code">I</a>
+                    <span class="flow-arrow">&rarr;</span>
+                    <a href="/dashboard/help#gate1" class="timeline-step" title="Gate 1: Quality checks (fmt, clippy, test)">G1</a>
+                    <span class="flow-arrow">&rarr;</span>
+                    <a href="/dashboard/help#reviewing" class="timeline-step" title="Reviewing: AI reviewer analyzing code">R</a>
+                    <span class="flow-arrow">&rarr;</span>
+                    <a href="/dashboard/help#gate2" class="timeline-step" title="Gate 2: Proof checks (Z3, Rocq)">G2</a>
+                    <span class="flow-arrow">&rarr;</span>
+                    <a href="/dashboard/help#approval" class="timeline-step" title="Awaiting Approval: Needs human review">A</a>
+                    <span class="flow-arrow">&rarr;</span>
+                    <a href="/dashboard/help#integrating" class="timeline-step" title="Integrating: Merging into target branch">Int</a>
+                    <span class="flow-arrow">&rarr;</span>
+                    <a href="/dashboard/help#ci" class="timeline-step" title="Awaiting CI: PR pushed, waiting for CI">CI</a>
+                    <span class="flow-arrow">&rarr;</span>
+                    <a href="/dashboard/help#merged" class="timeline-step done" title="Merged: Task complete">M</a>
+                </div>
+                <div class="legend-colors">
+                    <div><span class="timeline-step">P</span> Not reached</div>
+                    <div><span class="timeline-step done">P</span> Completed</div>
+                    <div><span class="timeline-step active">I</span> Active</div>
+                    <div><span class="timeline-step failed">G1</span> Failed</div>
+                    <a href="/dashboard/help" class="legend-help-link">Full pipeline docs &rarr;</a>
+                </div>
+            </div>
+        </details>
+
+        <!-- Task Queue — event-driven, refreshes on task state changes -->
         <div class="section">
-            <h2>Task Queue</h2>
+            <h2 title="All tasks in the pipeline with their current status, retry count, and available actions. Tasks move through: Pending → Implementing → Gate 1 → Review → Gate 2 → Approval → Integration → Merged.">Task Queue</h2>
+            <p class="section-description">Tasks progressing through the pipeline — click a row for details</p>
             <!-- Action result lives OUTSIDE the morphed container so morph cycles don't clear it -->
             <div id="task-action-result"></div>
             <div id="task-table"
                  hx-get="/dashboard/partials/tasks"
-                 hx-trigger="load, every 15s, refreshNow"
+                 hx-trigger="load, refreshTasks"
                  hx-swap="morph:innerHTML"
                  hx-indicator="#poll-indicator">
             </div>
@@ -52,29 +89,54 @@ <h2>Task Queue</h2>
 
         <!-- Agent Activity — real-time via SSE -->
         <div class="section">
-            <h2>Agent Activity <span class="section-badge" id="agent-count"></span></h2>
+            <h2 title="Live view of AI agents working on tasks. Each card shows the agent's current pipeline stage, file changes, and real-time output. Updates automatically via Server-Sent Events.">Agent Activity <span class="section-badge" id="agent-count"></span></h2>
+            <p class="section-description">Live AI agent sessions — cards update in real-time as agents implement, review, and gate-check tasks</p>
             <div id="agent-grid" class="agent-grid">
                 <div class="empty" id="no-agents">Waiting for agent events&hellip;</div>
             </div>
         </div>
 
-        <!-- Memory Entries — polls every 30s, morph preserves form inputs -->
+        <!-- Remote Sync Controls -->
+        <div class="section">
+            <h2 title="Trigger a git fetch + rebase for repository branches. Pulls upstream changes from the remote and rebases active task branches. Conflicts are resolved by a rebase agent.">Remote Sync</h2>
+            <p class="section-description">Fetch upstream changes and rebase active task branches onto updated main</p>
+            <div class="sync-controls">
+                <input type="text" id="sync-repo" class="sync-input" placeholder="repo name (e.g. loom)">
+                <button class="btn btn-sync" onclick="triggerSync()">Sync Now</button>
+            </div>
+            <div id="sync-log" class="event-log"></div>
+        </div>
+
+        <!-- Memory Entries — event-driven, refreshes on MemoryUpdated events -->
         <div class="section">
-            <h2>Memory</h2>
+            <h2 title="Persistent context entries used by agents across retries. Stores error patterns, architectural decisions, and contextual hints so agents learn from previous failures instead of repeating mistakes.">Memory</h2>
+            <p class="section-description">Persistent context for agents — error patterns, decisions, and hints that carry across retries</p>
             <div id="memory-section"
                  hx-get="/dashboard/partials/memory"
-                 hx-trigger="load, every 30s"
+                 hx-trigger="load, refreshMemory"
                  hx-swap="morph:innerHTML"
                  hx-indicator="#poll-indicator">
             </div>
         </div>
 
-        <!-- Activity Log — recent traces polled + live events via SSE -->
+        <!-- Traceability — event-driven, refreshes on task state changes -->
         <div class="section">
-            <h2>Activity Log</h2>
+            <h2>Traceability</h2>
+            <div id="traceability-section"
+                 hx-get="/dashboard/partials/traceability"
+                 hx-trigger="load, refreshTraceability"
+                 hx-swap="morph:innerHTML"
+                 hx-indicator="#poll-indicator">
+            </div>
+        </div>
+
+        <!-- Activity Log — event-driven, refreshes on pipeline events -->
+        <div class="section">
+            <h2 title="Pipeline events showing gate results (pass/fail), task state transitions, errors, and CI status updates. Generic infrastructure messages are filtered out — only meaningful pipeline activity is shown.">Pipeline Events</h2>
+            <p class="section-description">Gate results, state transitions, and errors — filtered to meaningful pipeline activity</p>
             <div id="activity-log"
                  hx-get="/dashboard/partials/activity"
-                 hx-trigger="load, every 10s"
+                 hx-trigger="load, refreshActivity"
                  hx-swap="morph:innerHTML"
                  hx-indicator="#poll-indicator">
             </div>
@@ -98,38 +160,219 @@ <h3>Reject Task</h3>
     </div>
 
     <script>
+
+    // ── Form Protection Layer ────────────────────────────────────
+    // Prevents HTMX morph refreshes from destroying active user input.
+
+    function sectionHasActiveForm(sectionId) {
+        var section = document.getElementById(sectionId);
+        if (!section) return false;
+        // Check for focused inputs
+        if (section.contains(document.activeElement) &&
+            (document.activeElement.tagName === 'INPUT' ||
+             document.activeElement.tagName === 'TEXTAREA' ||
+             document.activeElement.tagName === 'SELECT')) {
+            return true;
+        }
+        // Check for checked bulk-action checkboxes
+        var checked = section.querySelectorAll('input[type="checkbox"]:checked');
+        if (checked.length > 0) return true;
+        // Check for open modals
+        var modals = document.querySelectorAll('.modal-backdrop.active');
+        if (modals.length > 0) return true;
+        return false;
+    }
+
+    function saveCheckboxState(sectionId) {
+        var section = document.getElementById(sectionId);
+        if (!section) return {};
+        var state = {};
+        section.querySelectorAll('input[type="checkbox"]').forEach(function(cb) {
+            if (cb.checked && cb.name) {
+                state[cb.name + '_' + cb.value] = true;
+            }
+        });
+        return state;
+    }
+
+    function restoreCheckboxState(sectionId, state) {
+        var section = document.getElementById(sectionId);
+        if (!section) return;
+        section.querySelectorAll('input[type="checkbox"]').forEach(function(cb) {
+            var key = cb.name + '_' + cb.value;
+            if (state[key]) cb.checked = true;
+        });
+    }
+
+    function safeRefreshSection(sectionId, eventName) {
+        if (sectionHasActiveForm(sectionId)) {
+            // Defer until forms are no longer active
+            if (!window._pendingRefreshes) window._pendingRefreshes = {};
+            window._pendingRefreshes[sectionId] = eventName;
+            return;
+        }
+        var el = document.getElementById(sectionId);
+        if (el) htmx.trigger(el, eventName);
+    }
+
+    function flushPendingRefreshes() {
+        if (!window._pendingRefreshes) return;
+        var pending = window._pendingRefreshes;
+        window._pendingRefreshes = {};
+        for (var sectionId in pending) {
+            if (!sectionHasActiveForm(sectionId)) {
+                var el = document.getElementById(sectionId);
+                if (el) htmx.trigger(el, pending[sectionId]);
+            }
+        }
+    }
+
+    // Flush pending refreshes when user finishes interacting with forms
+    document.addEventListener('focusout', function() {
+        setTimeout(flushPendingRefreshes, 100);
+    });
+
+    // ── Surgical Update Functions ────────────────────────────────
+    // Update specific data without full HTML round-trips.
+
+    function updateStatusCountsSurgically() {
+        fetch('/dashboard/api/status')
+            .then(function(r) { return r.json(); })
+            .then(function(data) {
+                var el = document.getElementById('status-counts');
+                if (!el) return;
+                var spans = el.querySelectorAll('.stat-value');
+                // Only update if the section isn't being interacted with
+                if (!sectionHasActiveForm('status-counts')) {
+                    spans.forEach(function(span) {
+                        var label = span.previousElementSibling;
+                        if (!label) return;
+                        var text = label.textContent.toLowerCase().trim();
+                        if (text.includes('pending')) span.textContent = data.pending;
+                        else if (text.includes('active')) span.textContent = data.active;
+                        else if (text.includes('approval')) span.textContent = data.approval;
+                        else if (text.includes('merged')) span.textContent = data.merged;
+                        else if (text.includes('failed')) span.textContent = data.failed;
+                    });
+                }
+            })
+            .catch(function() {}); // silently fail — next event will retry
+    }
+
+    function updateBudgetSurgically() {
+        fetch('/dashboard/api/budget')
+            .then(function(r) { return r.json(); })
+            .then(function(data) {
+                var bar = document.getElementById('budget-bar');
+                if (!bar) return;
+                var pct = data.ceiling > 0 ? (data.spent / data.ceiling * 100) : 0;
+                var fill = bar.querySelector('.budget-fill');
+                if (fill) fill.style.width = Math.min(pct, 100) + '%';
+                var label = bar.querySelector('.budget-label');
+                if (label) label.textContent = '$' + data.spent.toFixed(2) + ' / $' + data.ceiling.toFixed(2);
+            })
+            .catch(function() {});
+    }
+
+    // ── Debounced Refresh Timers ─────────────────────────────────
+    // Batch rapid events to avoid excessive DOM updates.
+
+    var _taskRefreshTimer = null;
+    var _traceRefreshTimer = null;
+    var _activityRefreshTimer = null;
+
+    function refreshAllSections() {
+        safeRefreshSection('task-table', 'refreshTasks');
+        safeRefreshSection('status-counts', 'refreshStatus');
+        safeRefreshSection('traceability-section', 'refreshTraceability');
+    }
+
+    function debouncedActivityRefresh() {
+        if (_activityRefreshTimer) clearTimeout(_activityRefreshTimer);
+        _activityRefreshTimer = setTimeout(function() {
+            _activityRefreshTimer = null;
+            safeRefreshSection('activity-log', 'refreshActivity');
+        }, 2000);
+    }
+
     // ── Task Actions (via fetch, not hx-post — morph-safe) ─────
     // Buttons rendered inside the morphed #task-table use onclick+fetch
     // because idiomorph morph cycles don't reliably bind hx-* attributes.
 
-    function taskAction(url, body) {
+    function taskAction(url, body, triggerBtn) {
         var opts = { method: 'POST' };
         if (body) {
             opts.headers = { 'Content-Type': 'application/x-www-form-urlencoded' };
             opts.body = body;
         }
+        // Disable the triggering button to prevent double-clicks and show feedback
+        if (triggerBtn) {
+            triggerBtn.disabled = true;
+            triggerBtn.dataset.originalText = triggerBtn.textContent;
+            triggerBtn.textContent = '\u2026';
+        }
         fetch(url, opts)
-            .then(function(r) { return r.text(); })
+            .then(function(r) {
+                if (!r.ok) {
+                    throw new Error('Server returned ' + r.status + ' ' + r.statusText);
+                }
+                return r.text();
+            })
             .then(function(html) {
-                var el = document.getElementById('task-action-result');
-                el.innerHTML = html;
-                setTimeout(function() { el.innerHTML = ''; }, 5000);
-                htmx.trigger(document.getElementById('task-table'), 'refreshNow');
-                htmx.trigger(document.getElementById('status-counts'), 'refreshNow');
+                showActionResult(html);
+                safeRefreshSection('task-table', 'refreshTasks');
+                safeRefreshSection('status-counts', 'refreshStatus');
+            })
+            .catch(function(err) {
+                // Build error message safely using DOM methods (no raw innerHTML)
+                var errDiv = document.createElement('div');
+                errDiv.className = 'action-result error';
+                errDiv.textContent = 'Action failed: ' + err.message;
+                showActionResult(errDiv.outerHTML);
+            })
+            .finally(function() {
+                if (triggerBtn) {
+                    triggerBtn.disabled = false;
+                    if (triggerBtn.dataset.originalText) {
+                        triggerBtn.textContent = triggerBtn.dataset.originalText;
+                    }
+                }
             });
     }
 
-    function retryTask(taskId) {
-        taskAction('/dashboard/tasks/' + taskId + '/retry');
+    // Show a result message in the action-result area.
+    // Banners persist until the user dismisses them (no auto-clear timeout).
+    function showActionResult(html) {
+        var el = document.getElementById('task-action-result');
+        if (!el) return;
+        el.innerHTML = html;
+        addDismissButton(el);
+    }
+
+    function addDismissButton(container) {
+        var results = container.querySelectorAll('.action-result');
+        results.forEach(function(r) {
+            if (r.querySelector('.action-dismiss')) return;
+            var btn = document.createElement('button');
+            btn.className = 'action-dismiss';
+            btn.textContent = '\u00d7';
+            btn.title = 'Dismiss';
+            btn.onclick = function() { r.remove(); };
+            r.appendChild(btn);
+        });
+    }
+
+    function retryTask(taskId, btn) {
+        taskAction('/dashboard/tasks/' + taskId + '/retry', null, btn);
     }
 
     function setTaskStatus(taskId, status) {
         taskAction('/dashboard/tasks/' + taskId + '/status', 'status=' + encodeURIComponent(status));
     }
 
-    function deleteTask(taskId) {
+    function deleteTask(taskId, btn) {
         if (confirm('Delete TASK-' + String(taskId).padStart(4, '0') + '?')) {
-            taskAction('/dashboard/tasks/' + taskId + '/delete');
+            taskAction('/dashboard/tasks/' + taskId + '/delete', null, btn);
         }
     }
 
@@ -156,32 +399,130 @@ <h3>Reject Task</h3>
         }
     });
 
-    // ── SSE for Agent Activity & Event Stream ───────────────────
+    // ── WebSocket + SSE Fallback for Agent Activity & Event Stream ──
     var agents = {};
     var MAX_LOG_LINES = 200;
     var MAX_AGENT_LOG = 50;
 
-    var evtSource = new EventSource('/api/v1/events/stream');
+    // Connection state
+    var ws = null;
+    var evtSource = null;
+    var wsReconnectDelay = 1000;
+    var wsMaxReconnectDelay = 30000;
+    var wsReconnectTimer = null;
+    var usingWebSocket = false;
+
+    function setConnected(connected) {
+        var dot = document.getElementById('conn-dot');
+        if (connected) {
+            dot.classList.add('connected');
+            dot.classList.remove('disconnected');
+        } else {
+            dot.classList.remove('connected');
+            dot.classList.add('disconnected');
+        }
+    }
 
-    evtSource.addEventListener('pipeline_event', function(e) {
-        var event = JSON.parse(e.data);
-        handleEvent(event);
-    });
+    // ── WebSocket connection ────────────────────────────────────
+    function connectWebSocket() {
+        var proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
+        var url = proto + '//' + location.host + '/ws';
+
+        try {
+            ws = new WebSocket(url);
+        } catch (e) {
+            // WebSocket not available, fall back to SSE
+            connectSSE();
+            return;
+        }
 
-    evtSource.addEventListener('lagged', function(e) {
-        var info = JSON.parse(e.data);
-        appendLog('warn', 'Skipped ' + info.skipped + ' events (client lagged)');
-    });
+        ws.onopen = function() {
+            usingWebSocket = true;
+            wsReconnectDelay = 1000; // reset backoff on success
+            setConnected(true);
+            // Close any SSE fallback that might be running
+            if (evtSource) {
+                evtSource.close();
+                evtSource = null;
+            }
+        };
+
+        ws.onmessage = function(e) {
+            var msg;
+            try { msg = JSON.parse(e.data); } catch (_) { return; }
+
+            if (msg.type === 'event') {
+                handleEvent(msg.data);
+            } else if (msg.type === 'lagged') {
+                appendLog('warn', 'Skipped ' + msg.skipped + ' events (client lagged)');
+            } else if (msg.type === 'pong') {
+                // heartbeat response, no action needed
+            } else if (msg.type === 'error') {
+                appendLog('error', 'WS error: ' + msg.message);
+            }
+        };
+
+        ws.onclose = function() {
+            setConnected(false);
+            ws = null;
+            // Reconnect with exponential backoff
+            wsReconnectTimer = setTimeout(function() {
+                wsReconnectDelay = Math.min(wsReconnectDelay * 2, wsMaxReconnectDelay);
+                connectWebSocket();
+            }, wsReconnectDelay);
+        };
+
+        ws.onerror = function() {
+            // onerror is always followed by onclose, which handles reconnect.
+            // If this is the very first connection attempt and it fails,
+            // fall back to SSE immediately.
+            if (!usingWebSocket && !evtSource) {
+                if (wsReconnectTimer) clearTimeout(wsReconnectTimer);
+                ws.close();
+                ws = null;
+                connectSSE();
+            }
+        };
+    }
+
+    // ── SSE fallback ────────────────────────────────────────────
+    function connectSSE() {
+        if (evtSource) return; // already connected
 
-    evtSource.onopen = function() {
-        document.getElementById('conn-dot').classList.add('connected');
-        document.getElementById('conn-dot').classList.remove('disconnected');
-    };
+        evtSource = new EventSource('/api/v1/events/stream');
 
-    evtSource.onerror = function() {
-        document.getElementById('conn-dot').classList.remove('connected');
-        document.getElementById('conn-dot').classList.add('disconnected');
-    };
+        evtSource.addEventListener('pipeline_event', function(e) {
+            var event = JSON.parse(e.data);
+            handleEvent(event);
+        });
+
+        evtSource.addEventListener('lagged', function(e) {
+            var info = JSON.parse(e.data);
+            appendLog('warn', 'Skipped ' + info.skipped + ' events (client lagged)');
+        });
+
+        evtSource.onopen = function() {
+            setConnected(true);
+        };
+
+        evtSource.onerror = function() {
+            setConnected(false);
+        };
+    }
+
+    // ── Send command via WebSocket ──────────────────────────────
+    function wsSendCommand(command, payload) {
+        if (ws && ws.readyState === WebSocket.OPEN) {
+            var msg = { command: command };
+            if (payload) msg.payload = payload;
+            ws.send(JSON.stringify(msg));
+            return true;
+        }
+        return false;
+    }
+
+    // Start connection (WebSocket first, SSE as fallback)
+    connectWebSocket();
 
     // ── Event Router ────────────────────────────────────────────
     function handleEvent(event) {
@@ -192,6 +533,9 @@ <h3>Reject Task</h3>
             ensureAgent(d.agent_id, d.task_id, d.repo);
             agents[d.agent_id].stage = 'implementing';
             agents[d.agent_id].started = event.timestamp;
+            if (d.task_title) {
+                agents[d.agent_id].task_title = d.task_title;
+            }
             renderAgentCard(d.agent_id);
             appendLog('info', d.agent_id + ' started on ' + d.task_id);
         }
@@ -206,10 +550,16 @@ <h3>Reject Task</h3>
             ensureAgent(d.agent_id, d.task_id);
             agents[d.agent_id].stage = d.success ? 'finished' : 'failed';
             agents[d.agent_id].elapsed = d.elapsed_secs;
+            agents[d.agent_id].finished_at = Date.now();
             renderAgentCard(d.agent_id);
+            scheduleCollapse(d.agent_id);
             var status = d.success ? 'OK' : 'FAIL';
             appendLog(d.success ? 'info' : 'error',
                 d.agent_id + ' finished (' + status + ', ' + d.elapsed_secs.toFixed(1) + 's)');
+            // Refresh task table and status on agent completion
+            safeRefreshSection('task-table', 'refreshTasks');
+            safeRefreshSection('status-counts', 'refreshStatus');
+            debouncedActivityRefresh();
         }
         else if (kind.TaskStateChange) {
             var d = kind.TaskStateChange;
@@ -220,9 +570,15 @@ <h3>Reject Task</h3>
                 }
             }
             appendLog('info', d.task_id + ' (' + d.repo + '): ' + d.from + ' \u2192 ' + d.to);
-            // Refresh task table and status counts immediately on state change
-            htmx.trigger(document.getElementById('task-table'), 'refreshNow');
-            htmx.trigger(document.getElementById('status-counts'), 'refreshNow');
+            // Debounced refresh to batch rapid state changes
+            if (_taskRefreshTimer) clearTimeout(_taskRefreshTimer);
+            _taskRefreshTimer = setTimeout(function() {
+                _taskRefreshTimer = null;
+                safeRefreshSection('task-table', 'refreshTasks');
+                safeRefreshSection('status-counts', 'refreshStatus');
+                safeRefreshSection('traceability-section', 'refreshTraceability');
+            }, 500);
+            debouncedActivityRefresh();
         }
         else if (kind.GateStarted) {
             var d = kind.GateStarted;
@@ -240,6 +596,11 @@ <h3>Reject Task</h3>
             appendLog(d.passed ? 'info' : 'error',
                 d.task_id + ': gate ' + d.level + ' ' + status +
                 ' (' + d.duration_secs.toFixed(1) + 's)');
+            // Refresh task table, status counts, and traceability on gate completion
+            safeRefreshSection('task-table', 'refreshTasks');
+            safeRefreshSection('status-counts', 'refreshStatus');
+            safeRefreshSection('traceability-section', 'refreshTraceability');
+            debouncedActivityRefresh();
         }
         else if (kind.GateCheckFinished) {
             var d = kind.GateCheckFinished;
@@ -269,9 +630,49 @@ <h3>Reject Task</h3>
         }
         else if (kind.EngineLog) {
             var d = kind.EngineLog;
-            var level = d.level === 'Error' ? 'error' :
-                        d.level === 'Warn' ? 'warn' : 'info';
-            appendLog(level, d.message);
+            // Filter out generic infrastructure noise — only show pipeline-meaningful messages
+            if (isPipelineLogMessage(d.message)) {
+                var level = d.level === 'Error' ? 'error' :
+                            d.level === 'Warn' ? 'warn' : 'info';
+                appendLog(level, d.message);
+            }
+        }
+        // Sync events
+        else if (kind.SyncStarted) {
+            var d = kind.SyncStarted;
+            appendSyncLog('info', 'Sync started for ' + d.repo);
+        }
+        else if (kind.SyncCompleted) {
+            var d = kind.SyncCompleted;
+            appendSyncLog('info', 'Sync completed for ' + d.repo +
+                ': ' + d.branches_rebased + ' rebased, ' + d.branches_conflicted + ' conflicts');
+        }
+        else if (kind.BranchRebased) {
+            var d = kind.BranchRebased;
+            var status = d.success ? 'OK' : (d.had_conflicts ? 'CONFLICT' : 'FAIL');
+            appendSyncLog(d.success ? 'info' : 'warn',
+                d.repo + ': rebase ' + d.branch + ' -> ' + status);
+        }
+        else if (kind.RebaseAgentDispatched) {
+            var d = kind.RebaseAgentDispatched;
+            appendSyncLog('warn', d.repo + ': rebase agent dispatched for ' + d.branch);
+        }
+        else if (kind.SyncFailed) {
+            var d = kind.SyncFailed;
+            appendSyncLog('error', 'Sync failed for ' + d.repo + ': ' + d.error);
+        }
+        // Dashboard-originated events
+        else if (kind === 'BudgetUpdated' || kind.BudgetUpdated !== undefined) {
+            updateBudgetSurgically();
+            safeRefreshSection('budget-bar', 'refreshBudget');
+        }
+        else if (kind === 'MemoryUpdated' || kind.MemoryUpdated !== undefined) {
+            safeRefreshSection('memory-section', 'refreshMemory');
+        }
+        else if (kind === 'TaskDataChanged' || kind.TaskDataChanged !== undefined) {
+            safeRefreshSection('task-table', 'refreshTasks');
+            safeRefreshSection('status-counts', 'refreshStatus');
+            debouncedActivityRefresh();
         }
     }
 
@@ -281,13 +682,16 @@ <h3>Reject Task</h3>
             agents[agentId] = {
                 agent_id: agentId,
                 task_id: taskId || '?',
+                task_title: '',
                 repo: repo || '?',
                 stage: 'starting',
                 log: [],
                 files: null,
                 diff: null,
                 elapsed: null,
-                started: null
+                started: null,
+                finished_at: null,
+                collapse_timer: null
             };
             var placeholder = document.getElementById('no-agents');
             if (placeholder) placeholder.remove();
@@ -313,6 +717,54 @@ <h3>Reject Task</h3>
         }
     }
 
+    // ── Auto-collapse & elapsed helpers ─────────────────────────
+    var COLLAPSE_DELAY_MS = 60000;
+
+    function scheduleCollapse(agentId) {
+        var a = agents[agentId];
+        if (!a) return;
+        if (a.collapse_timer) clearTimeout(a.collapse_timer);
+        a.collapse_timer = setTimeout(function() {
+            var cardId = 'agent-' + cssId(agentId);
+            var card = document.getElementById(cardId);
+            if (card) card.classList.add('agent-card-collapsed');
+        }, COLLAPSE_DELAY_MS);
+    }
+
+    function formatElapsed(startTimestamp) {
+        var start = new Date(startTimestamp).getTime();
+        var now = Date.now();
+        var secs = Math.floor((now - start) / 1000);
+        if (secs < 60) return secs + 's';
+        var mins = Math.floor(secs / 60);
+        var remSecs = secs % 60;
+        if (mins < 60) return mins + 'm ' + remSecs + 's';
+        var hrs = Math.floor(mins / 60);
+        var remMins = mins % 60;
+        return hrs + 'h ' + remMins + 'm';
+    }
+
+    function taskIdNumber(taskId) {
+        if (!taskId) return null;
+        var s = String(taskId);
+        var match = s.match(/TASK-0*(\d+)/i);
+        if (match) return parseInt(match[1], 10);
+        var n = parseInt(s, 10);
+        return isNaN(n) ? null : n;
+    }
+
+    // Tick elapsed timers every second
+    setInterval(function() {
+        for (var aid in agents) {
+            var a = agents[aid];
+            if (a.started && !a.finished_at) {
+                var cardId = 'agent-' + cssId(aid);
+                var el = document.getElementById(cardId + '-elapsed');
+                if (el) el.textContent = formatElapsed(a.started);
+            }
+        }
+    }, 1000);
+
     // ── Agent Card Rendering ────────────────────────────────────
     function renderAgentCard(agentId) {
         var a = agents[agentId];
@@ -327,35 +779,47 @@ <h3>Reject Task</h3>
             grid.appendChild(card);
         }
 
+        var isCollapsed = card.classList.contains('agent-card-collapsed');
         card.textContent = '';
+        card.className = 'agent-card';
+        if (isCollapsed) card.classList.add('agent-card-collapsed');
+
         var stageClass = stageToClass(a.stage);
 
-        // Header
+        // Header with link to task detail
         var header = document.createElement('div');
         header.className = 'agent-header';
-        var title = document.createElement('div');
-        title.className = 'agent-title';
-        title.textContent = a.task_id;
+        var titleId = taskIdNumber(a.task_id);
+        var titleLink = document.createElement('a');
+        titleLink.className = 'agent-title';
+        titleLink.href = titleId !== null ? '/dashboard/tasks/' + titleId + '/review' : '#';
+        var titleText = String(a.task_id);
+        if (a.task_title) titleText += ': ' + a.task_title;
+        titleLink.textContent = titleText;
+        titleLink.title = titleText;
         var badge = document.createElement('span');
         badge.className = 'agent-badge ' + stageClass;
         badge.textContent = a.stage;
-        header.appendChild(title);
+        header.appendChild(titleLink);
         header.appendChild(badge);
         card.appendChild(header);
 
-        // Meta
+        // Meta: repo + elapsed timer
         var meta = document.createElement('div');
         meta.className = 'agent-meta';
         var repo = document.createElement('span');
         repo.className = 'agent-repo';
         repo.textContent = a.repo;
         meta.appendChild(repo);
-        if (a.elapsed) {
-            var elapsed = document.createElement('span');
-            elapsed.className = 'agent-elapsed';
+        var elapsed = document.createElement('span');
+        elapsed.className = 'agent-elapsed';
+        elapsed.id = cardId + '-elapsed';
+        if (a.finished_at && a.elapsed) {
             elapsed.textContent = a.elapsed.toFixed(1) + 's';
-            meta.appendChild(elapsed);
+        } else if (a.started) {
+            elapsed.textContent = formatElapsed(a.started);
         }
+        meta.appendChild(elapsed);
         card.appendChild(meta);
 
         // File stats
@@ -433,16 +897,12 @@ <h3>Reject Task</h3>
         log.scrollTop = log.scrollHeight;
     }
 
-    // ── Auto-clear action results after 5s ──────────────────────
+    // ── Add dismiss buttons to action results after swap ────────
     document.body.addEventListener('htmx:afterSwap', function(event) {
         if (event.detail.target.id === 'task-action-result') {
-            setTimeout(function() {
-                var el = document.getElementById('task-action-result');
-                if (el) el.innerHTML = '';
-            }, 5000);
-            // Also refresh the task table to reflect the change
-            htmx.trigger(document.getElementById('task-table'), 'refreshNow');
-            htmx.trigger(document.getElementById('status-counts'), 'refreshNow');
+            addDismissButton(event.detail.target);
+            safeRefreshSection('task-table', 'refreshTasks');
+            safeRefreshSection('status-counts', 'refreshStatus');
         }
     });
 
@@ -451,6 +911,92 @@ <h3>Reject Task</h3>
         return String(s).replace(/[^a-zA-Z0-9-]/g, '_');
     }
 
+    // Filter out generic infrastructure noise from EngineLog messages.
+    // Returns true for pipeline-meaningful messages (gate results, state changes,
+    // errors, budget events). Returns false for config loading, CLI invocations, etc.
+    var INFRA_NOISE_PATTERNS = [
+        'loaded pipeline config',
+        'loaded repos config',
+        'invoking claude cli',
+        'spawning subprocess',
+        'reading config',
+        'initializing',
+        'starting http server',
+        'listening on',
+        'connected to',
+        'loading plugin',
+        'registering handler',
+        'parsing',
+        'compiling',
+        'opening database',
+        'trace directory'
+    ];
+
+    function isPipelineLogMessage(message) {
+        var lower = message.toLowerCase();
+        for (var i = 0; i < INFRA_NOISE_PATTERNS.length; i++) {
+            if (lower.indexOf(INFRA_NOISE_PATTERNS[i]) >= 0) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    // ── Sync Controls ───────────────────────────────────────────
+    function triggerSync() {
+        var repo = document.getElementById('sync-repo').value.trim();
+        if (!repo) {
+            appendSyncLog('error', 'Please enter a repo name');
+            return;
+        }
+        appendSyncLog('info', 'Triggering sync for ' + repo + '...');
+        fetch('/api/v1/sync', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ repo: repo })
+        })
+        .then(function(r) { return r.json(); })
+        .then(function(data) {
+            if (data.success) {
+                appendSyncLog('info', data.message);
+            } else {
+                appendSyncLog('error', data.message);
+            }
+        })
+        .catch(function(err) {
+            appendSyncLog('error', 'Request failed: ' + err.message);
+        });
+    }
+
+    function appendSyncLog(level, message) {
+        var log = document.getElementById('sync-log');
+        var now = new Date().toLocaleTimeString('en-GB', { hour12: false });
+
+        var entry = document.createElement('div');
+        entry.className = 'log-entry';
+
+        var timeSpan = document.createElement('span');
+        timeSpan.className = 'log-time';
+        timeSpan.textContent = now;
+        entry.appendChild(timeSpan);
+
+        var levelSpan = document.createElement('span');
+        levelSpan.className = 'log-level ' + level;
+        levelSpan.textContent = level;
+        entry.appendChild(levelSpan);
+
+        var msgSpan = document.createElement('span');
+        msgSpan.className = 'log-message';
+        msgSpan.textContent = message;
+        entry.appendChild(msgSpan);
+
+        log.appendChild(entry);
+        while (log.children.length > 100) {
+            log.removeChild(log.firstChild);
+        }
+        log.scrollTop = log.scrollHeight;
+    }
+
     function stageToClass(stage) {
         if (!stage) return '';
         var s = stage.toLowerCase();
diff --git a/crates/thrum-api/assets/favicon.svg b/crates/thrum-api/assets/favicon.svg
new file mode 100644
index 0000000..e74ec1b
--- /dev/null
+++ b/crates/thrum-api/assets/favicon.svg
@@ -0,0 +1,20 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
+  <defs>
+    <linearGradient id="g" x1="0" y1="0" x2="1" y2="1">
+      <stop offset="0%" stop-color="#f0a040"/>
+      <stop offset="33%" stop-color="#40c8b0"/>
+      <stop offset="66%" stop-color="#a070f0"/>
+      <stop offset="100%" stop-color="#f06080"/>
+    </linearGradient>
+  </defs>
+  <rect width="32" height="32" rx="6" fill="#0f0e1a"/>
+  <!-- Woven threads -->
+  <path d="M6 8 Q16 4 26 8 Q16 12 6 8Z" fill="none" stroke="#f0a040" stroke-width="1.5" opacity="0.9"/>
+  <path d="M6 14 Q16 10 26 14 Q16 18 6 14Z" fill="none" stroke="#40c8b0" stroke-width="1.5" opacity="0.9"/>
+  <path d="M6 20 Q16 16 26 20 Q16 24 6 20Z" fill="none" stroke="#a070f0" stroke-width="1.5" opacity="0.9"/>
+  <path d="M6 26 Q16 22 26 26 Q16 30 6 26Z" fill="none" stroke="#f06080" stroke-width="1.5" opacity="0.9"/>
+  <!-- Vertical threads crossing -->
+  <line x1="11" y1="5" x2="11" y2="29" stroke="url(#g)" stroke-width="1" opacity="0.4"/>
+  <line x1="16" y1="5" x2="16" y2="29" stroke="url(#g)" stroke-width="1" opacity="0.4"/>
+  <line x1="21" y1="5" x2="21" y2="29" stroke="url(#g)" stroke-width="1" opacity="0.4"/>
+</svg>
diff --git a/crates/thrum-api/assets/help.css b/crates/thrum-api/assets/help.css
new file mode 100644
index 0000000..f8ea211
--- /dev/null
+++ b/crates/thrum-api/assets/help.css
@@ -0,0 +1,401 @@
+/* Thrum Help Page — additional styles */
+
+.header-link {
+    color: var(--accent);
+    text-decoration: none;
+}
+
+.header-link:hover {
+    text-decoration: underline;
+}
+
+.back-link {
+    color: var(--accent);
+    text-decoration: none;
+    font-size: 12px;
+}
+
+.back-link:hover {
+    text-decoration: underline;
+}
+
+.help-intro {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 16px 20px;
+    margin-bottom: 24px;
+    font-size: 13px;
+    color: var(--text);
+    line-height: 1.7;
+}
+
+/* ── Sections ─────────────────────────── */
+
+.help-section {
+    margin-bottom: 32px;
+}
+
+.help-section h2 {
+    font-size: 15px;
+    font-weight: 600;
+    color: var(--accent);
+    text-transform: uppercase;
+    letter-spacing: 1.5px;
+    margin-bottom: 12px;
+    padding-bottom: 8px;
+    border-bottom: 1px solid var(--border);
+}
+
+.help-section p {
+    font-size: 13px;
+    color: var(--text);
+    margin-bottom: 12px;
+    line-height: 1.7;
+}
+
+.help-section ul {
+    list-style: none;
+    padding: 0;
+    margin-bottom: 12px;
+}
+
+.help-section ul li {
+    padding: 4px 0 4px 20px;
+    font-size: 13px;
+    position: relative;
+}
+
+.help-section ul li::before {
+    content: '\2022';
+    color: var(--accent);
+    position: absolute;
+    left: 4px;
+}
+
+.help-section code {
+    background: var(--surface-raised);
+    padding: 1px 6px;
+    border-radius: 3px;
+    font-size: 12px;
+    color: var(--cyan);
+}
+
+/* ── Diagram ──────────────────────────── */
+
+.diagram {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 16px 20px;
+    overflow-x: auto;
+}
+
+.diagram pre {
+    font-size: 12px;
+    line-height: 1.4;
+    color: var(--text);
+}
+
+/* ── Stage Cards ──────────────────────── */
+
+.stage-card {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 16px 20px;
+    margin-bottom: 12px;
+}
+
+.stage-header {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    margin-bottom: 10px;
+}
+
+.stage-header h3 {
+    font-size: 14px;
+    font-weight: 600;
+    color: var(--text);
+}
+
+.stage-abbr {
+    display: inline-block;
+    padding: 2px 8px;
+    font-size: 11px;
+    font-weight: 700;
+    border-radius: 4px;
+    background: var(--surface-raised);
+    color: var(--text-muted);
+    letter-spacing: 0.5px;
+    min-width: 28px;
+    text-align: center;
+}
+
+.stage-abbr.stage-active {
+    background: #1a2a3a;
+    color: var(--cyan);
+}
+
+.stage-abbr.stage-gate {
+    background: #2a2a1a;
+    color: var(--amber);
+}
+
+.stage-abbr.stage-review {
+    background: #2a2a1a;
+    color: var(--amber);
+}
+
+.stage-abbr.stage-approval {
+    background: #2a2a1a;
+    color: var(--amber);
+}
+
+.stage-abbr.stage-done {
+    background: #1a2a1a;
+    color: var(--green);
+}
+
+.stage-card p {
+    font-size: 13px;
+    color: var(--text);
+    line-height: 1.7;
+    margin-bottom: 8px;
+}
+
+.stage-card ul {
+    list-style: none;
+    padding: 0;
+    margin: 8px 0;
+}
+
+.stage-card ul li {
+    padding: 3px 0 3px 20px;
+    font-size: 13px;
+    position: relative;
+}
+
+.stage-card ul li::before {
+    content: '\2022';
+    color: var(--accent);
+    position: absolute;
+    left: 4px;
+}
+
+.stage-next {
+    font-size: 12px;
+    color: var(--text-muted);
+    margin-top: 8px;
+}
+
+.stage-next strong {
+    color: var(--green);
+}
+
+.stage-fail {
+    font-size: 12px;
+    color: var(--text-muted);
+    margin-top: 4px;
+}
+
+.stage-fail strong {
+    color: var(--red);
+}
+
+/* ── Retry Table ──────────────────────── */
+
+.retry-table {
+    margin: 12px 0;
+}
+
+.retry-table table {
+    width: 100%;
+    border-collapse: collapse;
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    overflow: hidden;
+}
+
+.retry-table th,
+.retry-table td {
+    padding: 10px 14px;
+    text-align: left;
+    border-bottom: 1px solid var(--border);
+    font-size: 13px;
+}
+
+.retry-table th {
+    background: var(--surface-raised);
+    font-size: 11px;
+    color: var(--text-muted);
+    text-transform: uppercase;
+    letter-spacing: 1px;
+    font-weight: 600;
+}
+
+.retry-table tr:last-child td {
+    border-bottom: none;
+}
+
+/* ── Budget Features ──────────────────── */
+
+.budget-features li {
+    padding: 4px 0 4px 20px;
+}
+
+/* ── Status Reference Grid ────────────── */
+
+.status-ref-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
+    gap: 8px;
+    margin-top: 12px;
+}
+
+.status-ref-item {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    padding: 8px 12px;
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    font-size: 13px;
+}
+
+/* ── Timeline Reference ───────────────── */
+
+.timeline-ref {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    flex-wrap: wrap;
+    padding: 16px;
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    margin: 12px 0;
+}
+
+.timeline-ref-item {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+    font-size: 12px;
+}
+
+.timeline-arrow {
+    color: var(--text-muted);
+    font-size: 14px;
+}
+
+.timeline-colors {
+    display: flex;
+    gap: 20px;
+    flex-wrap: wrap;
+    margin-top: 12px;
+    font-size: 12px;
+    color: var(--text-muted);
+}
+
+.timeline-colors div {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+}
+
+/* ── Pipeline Legend (main dashboard) ──── */
+
+.pipeline-legend {
+    margin-bottom: 16px;
+}
+
+.pipeline-legend summary {
+    cursor: pointer;
+    font-size: 12px;
+    color: var(--text-muted);
+    padding: 8px 14px;
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    list-style: none;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    user-select: none;
+}
+
+.pipeline-legend summary::-webkit-details-marker {
+    display: none;
+}
+
+.pipeline-legend summary::before {
+    content: '\25B6';
+    font-size: 8px;
+    transition: transform 0.2s;
+    color: var(--accent);
+}
+
+.pipeline-legend[open] summary::before {
+    transform: rotate(90deg);
+}
+
+.pipeline-legend[open] summary {
+    border-radius: 8px 8px 0 0;
+    border-bottom-color: transparent;
+}
+
+.legend-content {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-top: none;
+    border-radius: 0 0 8px 8px;
+    padding: 14px 16px;
+}
+
+.legend-flow {
+    display: flex;
+    align-items: center;
+    gap: 4px;
+    flex-wrap: wrap;
+    margin-bottom: 10px;
+}
+
+.legend-flow .timeline-step {
+    cursor: help;
+}
+
+.legend-flow .flow-arrow {
+    color: var(--text-muted);
+    font-size: 10px;
+}
+
+.legend-colors {
+    display: flex;
+    gap: 16px;
+    flex-wrap: wrap;
+    font-size: 11px;
+    color: var(--text-muted);
+    padding-top: 8px;
+    border-top: 1px solid var(--border);
+}
+
+.legend-colors div {
+    display: flex;
+    align-items: center;
+    gap: 4px;
+}
+
+.legend-help-link {
+    margin-left: auto;
+    color: var(--accent);
+    text-decoration: none;
+    font-size: 11px;
+}
+
+.legend-help-link:hover {
+    text-decoration: underline;
+}
diff --git a/crates/thrum-api/assets/help.html b/crates/thrum-api/assets/help.html
new file mode 100644
index 0000000..588ad72
--- /dev/null
+++ b/crates/thrum-api/assets/help.html
@@ -0,0 +1,376 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>Thrum — Pipeline Reference</title>
+    <link rel="stylesheet" href="/dashboard/assets/style.css">
+    <link rel="stylesheet" href="/dashboard/assets/help.css">
+</head>
+<body>
+    <div class="container">
+        <header>
+            <h1><a href="/dashboard" class="header-link">thrum</a></h1>
+            <div class="version">
+                <a href="/dashboard" class="back-link">&larr; dashboard</a>
+                pipeline reference
+            </div>
+        </header>
+
+        <div class="help-intro">
+            <p>Thrum is an orchestration engine for autonomous AI-driven development.
+            Tasks move through a gated pipeline with configurable quality, proof, and
+            integration checks. This page documents every stage, gate, and mechanism.</p>
+        </div>
+
+        <!-- ── State Machine Diagram ───────────────────── -->
+        <div class="help-section" id="state-machine">
+            <h2>State Machine</h2>
+            <p>Every task follows this pipeline from creation to merge. Failed gates
+            and human rejections cycle the task back to Implementing for retry.</p>
+            <div class="diagram">
+<pre>
+  ┌─────────┐    ┌──────────────┐    ┌──────────────┐    ┌───────────┐
+  │ Pending │───▶│ Implementing │───▶│ Gate 1:      │───▶│ Reviewing │
+  └─────────┘    └──────────────┘    │ Quality      │    └───────────┘
+                       ▲             └──────────────┘         │
+                       │                   │ fail             │
+                       │◄──────────────────┘                  ▼
+                       │                              ┌──────────────┐
+                       │                              │ Gate 2:      │
+                       │                              │ Proof        │
+                       │                              └──────────────┘
+                       │                                    │ fail │
+                       │◄───────────────────────────────────┘      │
+                       │                                           ▼
+                       │         ┌──────────┐           ┌────────────────┐
+                       │◄────────│ Rejected │◄──────────│   Awaiting     │
+                       │         └──────────┘    reject  │   Approval     │
+                       │                                 └────────────────┘
+                       │                                        │ approve
+                       │                                        ▼
+                       │                              ┌──────────────┐
+                       │                              │ Integrating  │
+                       │                              └──────────────┘
+                       │                                    │ fail │
+                       │◄───────────────────────────────────┘      │
+                       │                                           ▼
+                       │                                 ┌──────────────┐
+                       │                                 │ Awaiting CI  │
+                       │                                 └──────────────┘
+                       │                                       │ fail │
+                       │◄──────────────────────────────────────┘      │
+                                                                      ▼
+                                                              ┌────────────┐
+                                                              │   Merged   │
+                                                              └────────────┘
+</pre>
+            </div>
+        </div>
+
+        <!-- ── Pipeline Stages ─────────────────────────── -->
+        <div class="help-section" id="stages">
+            <h2>Pipeline Stages</h2>
+
+            <div class="stage-card" id="pending">
+                <div class="stage-header">
+                    <span class="stage-abbr">P</span>
+                    <h3>Pending</h3>
+                </div>
+                <p>Task is queued and waiting for an available agent to pick it up.
+                Tasks are dispatched in priority order — the engine selects the
+                highest-priority pending task that fits within the remaining budget.</p>
+                <div class="stage-next">Next: <strong>Implementing</strong> (when an agent claims the task)</div>
+            </div>
+
+            <div class="stage-card" id="implementing">
+                <div class="stage-header">
+                    <span class="stage-abbr stage-active">I</span>
+                    <h3>Implementing</h3>
+                </div>
+                <p>An AI agent is actively writing code on a dedicated branch.
+                The agent receives the task description, acceptance criteria,
+                the target repo's CLAUDE.md conventions, and any memory context
+                from previous attempts. Implementation happens in an isolated
+                git worktree to avoid conflicts.</p>
+                <div class="stage-next">Next: <strong>Gate 1: Quality</strong> (automatic on agent completion)</div>
+            </div>
+
+            <div class="stage-card" id="gate1">
+                <div class="stage-header">
+                    <span class="stage-abbr stage-gate">G1</span>
+                    <h3>Gate 1: Quality</h3>
+                </div>
+                <p>Automated quality checks run against the task branch. These are
+                configurable per-repo but typically include:</p>
+                <ul>
+                    <li><strong>cargo fmt --check</strong> — formatting compliance</li>
+                    <li><strong>cargo clippy</strong> — lint and static analysis</li>
+                    <li><strong>cargo test</strong> — unit and integration tests</li>
+                </ul>
+                <p>All checks must pass for the gate to open. If any check fails,
+                the task cycles back to <strong>Implementing</strong> for retry.</p>
+                <div class="stage-fail">On failure: returns to <strong>Implementing</strong> (retry count incremented)</div>
+                <div class="stage-next">On pass: <strong>Reviewing</strong></div>
+            </div>
+
+            <div class="stage-card" id="reviewing">
+                <div class="stage-header">
+                    <span class="stage-abbr stage-review">R</span>
+                    <h3>Reviewing</h3>
+                </div>
+                <p>A separate AI reviewer agent examines the implementation for
+                correctness, security, and adherence to requirements. The reviewer
+                produces a structured analysis including a diff summary, acceptance
+                criteria mapping, and a recommendation.</p>
+                <div class="stage-next">Next: <strong>Gate 2: Proof</strong> (automatic on review completion)</div>
+            </div>
+
+            <div class="stage-card" id="gate2">
+                <div class="stage-header">
+                    <span class="stage-abbr stage-gate">G2</span>
+                    <h3>Gate 2: Proof</h3>
+                </div>
+                <p>Formal verification checks for mathematical correctness. These
+                are opt-in and typically include:</p>
+                <ul>
+                    <li><strong>Z3 SMT solver</strong> — automated theorem proving</li>
+                    <li><strong>Rocq (Coq) proofs</strong> — interactive proof verification</li>
+                </ul>
+                <p>If no proof checks are configured, this gate passes automatically.</p>
+                <div class="stage-fail">On failure: returns to <strong>Implementing</strong> (retry count incremented)</div>
+                <div class="stage-next">On pass: <strong>Awaiting Approval</strong></div>
+            </div>
+
+            <div class="stage-card" id="approval">
+                <div class="stage-header">
+                    <span class="stage-abbr stage-approval">A</span>
+                    <h3>Awaiting Approval</h3>
+                </div>
+                <p>The task has passed all automated gates and is waiting for a human
+                to review and approve it. The dashboard provides a full review page
+                with the diff, acceptance criteria, gate reports, and reviewer output.</p>
+                <ul>
+                    <li><strong>Approve</strong> — moves the task to Integration</li>
+                    <li><strong>Reject</strong> — returns to Implementing with feedback for the agent</li>
+                </ul>
+                <div class="stage-next">On approve: <strong>Integrating</strong></div>
+                <div class="stage-fail">On reject: returns to <strong>Implementing</strong> with feedback</div>
+            </div>
+
+            <div class="stage-card" id="integrating">
+                <div class="stage-header">
+                    <span class="stage-abbr stage-active">Int</span>
+                    <h3>Integrating</h3>
+                </div>
+                <p>The approved changes are being merged into the target branch.
+                The engine performs a git merge (or rebase) from the task branch
+                into the main branch. If configured, a PR is created and pushed
+                to the remote.</p>
+                <div class="stage-fail">On failure: returns to <strong>Implementing</strong> (merge conflicts)</div>
+                <div class="stage-next">On success: <strong>Awaiting CI</strong> or <strong>Merged</strong></div>
+            </div>
+
+            <div class="stage-card" id="ci">
+                <div class="stage-header">
+                    <span class="stage-abbr stage-active">CI</span>
+                    <h3>Awaiting CI</h3>
+                </div>
+                <p>A pull request has been created and pushed. The engine polls
+                the CI pipeline status. If CI passes, the task moves to Merged.
+                If CI fails, the task enters CIFailed status for human review.</p>
+                <div class="stage-fail">On failure: <strong>CI Failed</strong> (needs human review or retry)</div>
+                <div class="stage-next">On pass: <strong>Merged</strong></div>
+            </div>
+
+            <div class="stage-card" id="merged">
+                <div class="stage-header">
+                    <span class="stage-abbr stage-done">M</span>
+                    <h3>Merged</h3>
+                </div>
+                <p>The task is complete. All changes have been merged into the main
+                branch and (if configured) the PR has been merged. This is the
+                terminal state — no further transitions are possible.</p>
+            </div>
+        </div>
+
+        <!-- ── Retry Logic ─────────────────────────────── -->
+        <div class="help-section" id="retry-logic">
+            <h2>Retry Logic</h2>
+            <p>When a task fails a gate or is rejected by a human, it cycles back
+            to <strong>Implementing</strong> for another attempt. The engine tracks
+            retries with an escalating strategy:</p>
+            <div class="retry-table">
+                <table>
+                    <thead>
+                        <tr>
+                            <th>Retry</th>
+                            <th>Strategy</th>
+                            <th>Description</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+                        <tr>
+                            <td>1–3</td>
+                            <td><span class="badge badge-normal">normal</span></td>
+                            <td>Standard retry with gate failure feedback</td>
+                        </tr>
+                        <tr>
+                            <td>4–6</td>
+                            <td><span class="badge badge-expanded-context">expanded-context</span></td>
+                            <td>Agent receives additional context and memory entries</td>
+                        </tr>
+                        <tr>
+                            <td>7–9</td>
+                            <td><span class="badge badge-different-approach">different-approach</span></td>
+                            <td>Agent is instructed to try a fundamentally different approach</td>
+                        </tr>
+                        <tr>
+                            <td>10</td>
+                            <td><span class="badge badge-human-review">human-review</span></td>
+                            <td>Maximum retries reached; task requires human intervention</td>
+                        </tr>
+                    </tbody>
+                </table>
+            </div>
+            <p>The convergence tracker detects repeated failure patterns (same error
+            signature across attempts) and escalates the strategy earlier when the
+            task appears stuck in a loop.</p>
+        </div>
+
+        <!-- ── Budget Model ────────────────────────────── -->
+        <div class="help-section" id="budget">
+            <h2>Budget Model</h2>
+            <p>Thrum tracks API token usage to prevent runaway spending. The budget
+            system provides:</p>
+            <ul class="budget-features">
+                <li><strong>Daily budget cap</strong> — maximum tokens allowed per 24-hour period</li>
+                <li><strong>Per-task budget</strong> — individual task spending limits</li>
+                <li><strong>Usage tracking</strong> — real-time token consumption monitoring</li>
+                <li><strong>Budget bar</strong> — visual indicator on the dashboard header</li>
+            </ul>
+            <p>When the daily budget is exhausted, the engine pauses task dispatching
+            until the budget window resets. Budget configuration lives in
+            <code>configs/pipeline.toml</code> under the <code>[budget]</code> section.</p>
+        </div>
+
+        <!-- ── Status Reference ────────────────────────── -->
+        <div class="help-section" id="status-reference">
+            <h2>Status Reference</h2>
+            <p>Quick reference for all task statuses and their badge colors:</p>
+            <div class="status-ref-grid">
+                <div class="status-ref-item">
+                    <span class="badge badge-pending">pending</span>
+                    <span>Queued for processing</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-implementing">implementing</span>
+                    <span>Agent is writing code</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-gate1-failed">gate1-failed</span>
+                    <span>Quality checks failed</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-reviewing">reviewing</span>
+                    <span>AI reviewer analyzing code</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-gate2-failed">gate2-failed</span>
+                    <span>Proof checks failed</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-awaiting-approval">awaiting-approval</span>
+                    <span>Needs human approval</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-approved">approved</span>
+                    <span>Human approved</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-rejected">rejected</span>
+                    <span>Human rejected</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-integrating">integrating</span>
+                    <span>Merging into target branch</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-gate3-failed">gate3-failed</span>
+                    <span>Integration failed</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-awaiting-ci">awaiting-ci</span>
+                    <span>Waiting for CI to pass</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-ci-failed">ci-failed</span>
+                    <span>CI pipeline failed</span>
+                </div>
+                <div class="status-ref-item">
+                    <span class="badge badge-merged">merged</span>
+                    <span>Complete and merged</span>
+                </div>
+            </div>
+        </div>
+
+        <!-- ── Timeline Key ────────────────────────────── -->
+        <div class="help-section" id="timeline-key">
+            <h2>Timeline Key</h2>
+            <p>The inline timeline in the task table uses these abbreviations:</p>
+            <div class="timeline-ref">
+                <div class="timeline-ref-item">
+                    <span class="timeline-step">P</span>
+                    <span>Pending</span>
+                </div>
+                <span class="timeline-arrow">&rarr;</span>
+                <div class="timeline-ref-item">
+                    <span class="timeline-step active">I</span>
+                    <span>Implementing</span>
+                </div>
+                <span class="timeline-arrow">&rarr;</span>
+                <div class="timeline-ref-item">
+                    <span class="timeline-step">G1</span>
+                    <span>Gate 1</span>
+                </div>
+                <span class="timeline-arrow">&rarr;</span>
+                <div class="timeline-ref-item">
+                    <span class="timeline-step">R</span>
+                    <span>Reviewing</span>
+                </div>
+                <span class="timeline-arrow">&rarr;</span>
+                <div class="timeline-ref-item">
+                    <span class="timeline-step">G2</span>
+                    <span>Gate 2</span>
+                </div>
+                <span class="timeline-arrow">&rarr;</span>
+                <div class="timeline-ref-item">
+                    <span class="timeline-step">A</span>
+                    <span>Approval</span>
+                </div>
+                <span class="timeline-arrow">&rarr;</span>
+                <div class="timeline-ref-item">
+                    <span class="timeline-step">Int</span>
+                    <span>Integrating</span>
+                </div>
+                <span class="timeline-arrow">&rarr;</span>
+                <div class="timeline-ref-item">
+                    <span class="timeline-step">CI</span>
+                    <span>CI</span>
+                </div>
+                <span class="timeline-arrow">&rarr;</span>
+                <div class="timeline-ref-item">
+                    <span class="timeline-step done">M</span>
+                    <span>Merged</span>
+                </div>
+            </div>
+            <div class="timeline-colors">
+                <div><span class="timeline-step">P</span> Default (not reached)</div>
+                <div><span class="timeline-step done">P</span> Done (completed)</div>
+                <div><span class="timeline-step active">I</span> Active (current stage)</div>
+                <div><span class="timeline-step failed">G1</span> Failed (gate/rejection)</div>
+            </div>
+        </div>
+    </div>
+</body>
+</html>
diff --git a/crates/thrum-api/assets/live.css b/crates/thrum-api/assets/live.css
index 865ed78..f401267 100644
--- a/crates/thrum-api/assets/live.css
+++ b/crates/thrum-api/assets/live.css
@@ -40,6 +40,23 @@
     display: flex;
     flex-direction: column;
     gap: 8px;
+    transition: opacity 0.6s ease, max-height 0.6s ease;
+    max-height: 600px;
+    overflow: hidden;
+}
+
+.agent-card.agent-card-collapsed {
+    opacity: 0.4;
+    max-height: 52px;
+    padding: 10px 14px;
+    gap: 0;
+    cursor: pointer;
+}
+
+.agent-card.agent-card-collapsed .agent-log,
+.agent-card.agent-card-collapsed .agent-files,
+.agent-card.agent-card-collapsed .agent-meta {
+    display: none;
 }
 
 .agent-header {
@@ -56,6 +73,12 @@
     text-overflow: ellipsis;
     white-space: nowrap;
     max-width: 70%;
+    text-decoration: none;
+}
+
+.agent-title:hover {
+    text-decoration: underline;
+    opacity: 0.85;
 }
 
 .agent-badge {
diff --git a/crates/thrum-api/assets/live.html b/crates/thrum-api/assets/live.html
index a4df4fd..a08531c 100644
--- a/crates/thrum-api/assets/live.html
+++ b/crates/thrum-api/assets/live.html
@@ -40,20 +40,21 @@ <h2>Event Stream</h2>
 
     <script>
     // ── State ────────────────────────────────────────────────────
-    const agents = {};
-    const MAX_LOG_LINES = 200;
-    const MAX_AGENT_LOG = 50;
+    var agents = {};
+    var MAX_LOG_LINES = 200;
+    var MAX_AGENT_LOG = 50;
+    var COLLAPSE_DELAY_MS = 60000;
 
     // ── SSE Connection ──────────────────────────────────────────
-    const evtSource = new EventSource('/api/v1/events/stream');
+    var evtSource = new EventSource('/api/v1/events/stream');
 
     evtSource.addEventListener('pipeline_event', function(e) {
-        const event = JSON.parse(e.data);
+        var event = JSON.parse(e.data);
         handleEvent(event);
     });
 
     evtSource.addEventListener('lagged', function(e) {
-        const info = JSON.parse(e.data);
+        var info = JSON.parse(e.data);
         appendLog('warn', 'Skipped ' + info.skipped + ' events (client lagged)');
     });
 
@@ -67,71 +68,109 @@ <h2>Event Stream</h2>
         document.getElementById('conn-dot').classList.add('disconnected');
     };
 
+    // ── Elapsed Time Timer ──────────────────────────────────────
+    setInterval(function() {
+        for (var aid in agents) {
+            var a = agents[aid];
+            if (a.started && !a.finished_at) {
+                updateElapsedDisplay(aid);
+            }
+        }
+    }, 1000);
+
+    function updateElapsedDisplay(agentId) {
+        var a = agents[agentId];
+        if (!a || !a.started) return;
+        var cardId = 'agent-' + cssId(agentId);
+        var el = document.getElementById(cardId + '-elapsed');
+        if (el) {
+            el.textContent = formatElapsed(a.started);
+        }
+    }
+
+    function formatElapsed(startTimestamp) {
+        var start = new Date(startTimestamp).getTime();
+        var now = Date.now();
+        var secs = Math.floor((now - start) / 1000);
+        if (secs < 60) return secs + 's';
+        var mins = Math.floor(secs / 60);
+        var remSecs = secs % 60;
+        if (mins < 60) return mins + 'm ' + remSecs + 's';
+        var hrs = Math.floor(mins / 60);
+        var remMins = mins % 60;
+        return hrs + 'h ' + remMins + 'm';
+    }
+
     // ── Event Router ────────────────────────────────────────────
     function handleEvent(event) {
-        const kind = event.kind;
+        var kind = event.kind;
 
         if (kind.AgentStarted) {
-            const d = kind.AgentStarted;
+            var d = kind.AgentStarted;
             ensureAgent(d.agent_id, d.task_id, d.repo);
             agents[d.agent_id].stage = 'implementing';
             agents[d.agent_id].started = event.timestamp;
+            if (d.task_title) {
+                agents[d.agent_id].task_title = d.task_title;
+            }
             renderAgentCard(d.agent_id);
             appendLog('info', d.agent_id + ' started on ' + d.task_id);
         }
         else if (kind.AgentOutput) {
-            const d = kind.AgentOutput;
+            var d = kind.AgentOutput;
             ensureAgent(d.agent_id, d.task_id);
             pushAgentLog(d.agent_id, d.stream, d.line);
             renderAgentCard(d.agent_id);
         }
         else if (kind.AgentFinished) {
-            const d = kind.AgentFinished;
+            var d = kind.AgentFinished;
             ensureAgent(d.agent_id, d.task_id);
             agents[d.agent_id].stage = d.success ? 'finished' : 'failed';
             agents[d.agent_id].elapsed = d.elapsed_secs;
+            agents[d.agent_id].finished_at = Date.now();
             renderAgentCard(d.agent_id);
-            const status = d.success ? 'OK' : 'FAIL';
+            scheduleCollapse(d.agent_id);
+            var status = d.success ? 'OK' : 'FAIL';
             appendLog(d.success ? 'info' : 'error',
                 d.agent_id + ' finished (' + status + ', ' + d.elapsed_secs.toFixed(1) + 's)');
         }
         else if (kind.TaskStateChange) {
-            const d = kind.TaskStateChange;
-            for (const [aid, a] of Object.entries(agents)) {
-                if (a.task_id === d.task_id) {
-                    a.stage = d.to;
+            var d = kind.TaskStateChange;
+            for (var aid in agents) {
+                if (agents[aid].task_id === d.task_id) {
+                    agents[aid].stage = d.to;
                     renderAgentCard(aid);
                 }
             }
             appendLog('info', d.task_id + ' (' + d.repo + '): ' + d.from + ' \u2192 ' + d.to);
         }
         else if (kind.GateStarted) {
-            const d = kind.GateStarted;
-            for (const [aid, a] of Object.entries(agents)) {
-                if (a.task_id === d.task_id) {
-                    a.stage = 'gate: ' + d.level;
+            var d = kind.GateStarted;
+            for (var aid in agents) {
+                if (agents[aid].task_id === d.task_id) {
+                    agents[aid].stage = 'gate: ' + d.level;
                     renderAgentCard(aid);
                 }
             }
             appendLog('info', d.task_id + ': gate ' + d.level + ' started');
         }
         else if (kind.GateFinished) {
-            const d = kind.GateFinished;
-            const status = d.passed ? 'PASS' : 'FAIL';
+            var d = kind.GateFinished;
+            var status = d.passed ? 'PASS' : 'FAIL';
             appendLog(d.passed ? 'info' : 'error',
                 d.task_id + ': gate ' + d.level + ' ' + status +
                 ' (' + d.duration_secs.toFixed(1) + 's)');
         }
         else if (kind.GateCheckFinished) {
-            const d = kind.GateCheckFinished;
-            const status = d.passed ? 'PASS' : 'FAIL';
+            var d = kind.GateCheckFinished;
+            var status = d.passed ? 'PASS' : 'FAIL';
             appendLog(d.passed ? 'info' : 'warn',
                 'gate/' + d.check_name + ': ' + status);
         }
         else if (kind.FileChanged) {
-            const d = kind.FileChanged;
+            var d = kind.FileChanged;
             ensureAgent(d.agent_id, d.task_id);
-            const a = agents[d.agent_id];
+            var a = agents[d.agent_id];
             if (!a.files) a.files = { created: 0, modified: 0, deleted: 0 };
             if (d.kind === 'Created') a.files.created++;
             else if (d.kind === 'Modified') a.files.modified++;
@@ -139,7 +178,7 @@ <h2>Event Stream</h2>
             renderAgentCard(d.agent_id);
         }
         else if (kind.DiffUpdate) {
-            const d = kind.DiffUpdate;
+            var d = kind.DiffUpdate;
             ensureAgent(d.agent_id, d.task_id);
             agents[d.agent_id].diff = {
                 files: d.files_changed,
@@ -149,8 +188,8 @@ <h2>Event Stream</h2>
             renderAgentCard(d.agent_id);
         }
         else if (kind.EngineLog) {
-            const d = kind.EngineLog;
-            const level = d.level === 'Error' ? 'error' :
+            var d = kind.EngineLog;
+            var level = d.level === 'Error' ? 'error' :
                           d.level === 'Warn' ? 'warn' : 'info';
             appendLog(level, d.message);
         }
@@ -162,13 +201,16 @@ <h2>Event Stream</h2>
             agents[agentId] = {
                 agent_id: agentId,
                 task_id: taskId || '?',
+                task_title: '',
                 repo: repo || '?',
                 stage: 'starting',
                 log: [],
                 files: null,
                 diff: null,
                 elapsed: null,
-                started: null
+                started: null,
+                finished_at: null,
+                collapse_timer: null
             };
             var placeholder = document.getElementById('no-agents');
             if (placeholder) placeholder.remove();
@@ -178,22 +220,51 @@ <h2>Event Stream</h2>
     }
 
     function pushAgentLog(agentId, stream, line) {
-        const a = agents[agentId];
+        var a = agents[agentId];
         if (!a) return;
-        const tag = stream === 'Stderr' ? 'err' : 'out';
+        var tag = stream === 'Stderr' ? 'err' : 'out';
         a.log.push({ tag: tag, line: line });
         if (a.log.length > MAX_AGENT_LOG) {
             a.log = a.log.slice(-MAX_AGENT_LOG);
         }
     }
 
+    // ── Auto-collapse finished cards ────────────────────────────
+    function scheduleCollapse(agentId) {
+        var a = agents[agentId];
+        if (!a) return;
+        if (a.collapse_timer) clearTimeout(a.collapse_timer);
+        a.collapse_timer = setTimeout(function() {
+            collapseCard(agentId);
+        }, COLLAPSE_DELAY_MS);
+    }
+
+    function collapseCard(agentId) {
+        var cardId = 'agent-' + cssId(agentId);
+        var card = document.getElementById(cardId);
+        if (card) {
+            card.classList.add('agent-card-collapsed');
+        }
+    }
+
+    // ── Task ID extraction ──────────────────────────────────────
+    function taskIdNumber(taskId) {
+        if (!taskId) return null;
+        var s = String(taskId);
+        // Handle both "TASK-0042" and raw number 42
+        var match = s.match(/TASK-0*(\d+)/i);
+        if (match) return parseInt(match[1], 10);
+        var n = parseInt(s, 10);
+        return isNaN(n) ? null : n;
+    }
+
     // ── Rendering (safe DOM construction) ───────────────────────
     function renderAgentCard(agentId) {
-        const a = agents[agentId];
+        var a = agents[agentId];
         if (!a) return;
-        const grid = document.getElementById('agent-grid');
-        const cardId = 'agent-' + cssId(agentId);
-        let card = document.getElementById(cardId);
+        var grid = document.getElementById('agent-grid');
+        var cardId = 'agent-' + cssId(agentId);
+        var card = document.getElementById(cardId);
         if (!card) {
             card = document.createElement('div');
             card.id = cardId;
@@ -201,49 +272,75 @@ <h2>Event Stream</h2>
             grid.appendChild(card);
         }
 
+        // Preserve collapsed state
+        var isCollapsed = card.classList.contains('agent-card-collapsed');
+
         // Clear and rebuild using safe DOM methods
         card.textContent = '';
+        card.className = 'agent-card';
+        if (isCollapsed) card.classList.add('agent-card-collapsed');
 
-        const stageClass = stageToClass(a.stage);
+        var stageClass = stageToClass(a.stage);
 
         // Header row: task title + stage badge
-        const header = document.createElement('div');
+        var header = document.createElement('div');
         header.className = 'agent-header';
-        const title = document.createElement('div');
-        title.className = 'agent-title';
-        title.textContent = a.task_id;
-        const badge = document.createElement('span');
+
+        // Task title with link to detail page
+        var titleId = taskIdNumber(a.task_id);
+        var titleLink = document.createElement('a');
+        titleLink.className = 'agent-title';
+        if (titleId !== null) {
+            titleLink.href = '/dashboard/tasks/' + titleId + '/review';
+        } else {
+            titleLink.href = '#';
+        }
+        // Show task title alongside task ID
+        var titleText = String(a.task_id);
+        if (a.task_title) {
+            titleText += ': ' + a.task_title;
+        }
+        titleLink.textContent = titleText;
+        titleLink.title = titleText;
+
+        var badge = document.createElement('span');
         badge.className = 'agent-badge ' + stageClass;
         badge.textContent = a.stage;
-        header.appendChild(title);
+        header.appendChild(titleLink);
         header.appendChild(badge);
         card.appendChild(header);
 
-        // Meta row: repo + elapsed
-        const meta = document.createElement('div');
+        // Meta row: repo + elapsed timer
+        var meta = document.createElement('div');
         meta.className = 'agent-meta';
-        const repo = document.createElement('span');
+        var repo = document.createElement('span');
         repo.className = 'agent-repo';
         repo.textContent = a.repo;
         meta.appendChild(repo);
-        if (a.elapsed) {
-            const elapsed = document.createElement('span');
-            elapsed.className = 'agent-elapsed';
+
+        var elapsed = document.createElement('span');
+        elapsed.className = 'agent-elapsed';
+        elapsed.id = cardId + '-elapsed';
+        if (a.finished_at && a.elapsed) {
+            // Finished: show final static elapsed time
             elapsed.textContent = a.elapsed.toFixed(1) + 's';
-            meta.appendChild(elapsed);
+        } else if (a.started) {
+            // Active: show live ticking timer
+            elapsed.textContent = formatElapsed(a.started);
         }
+        meta.appendChild(elapsed);
         card.appendChild(meta);
 
         // File stats
         if (a.diff) {
-            const filesDiv = document.createElement('div');
+            var filesDiv = document.createElement('div');
             filesDiv.className = 'agent-files';
             addFileStat(filesDiv, a.diff.files + ' files', '');
             addFileStat(filesDiv, '+' + a.diff.ins, 'ins');
             addFileStat(filesDiv, '-' + a.diff.del, 'del');
             card.appendChild(filesDiv);
         } else if (a.files) {
-            const filesDiv = document.createElement('div');
+            var filesDiv = document.createElement('div');
             filesDiv.className = 'agent-files';
             if (a.files.created) addFileStat(filesDiv, '+' + a.files.created, '');
             if (a.files.modified) addFileStat(filesDiv, '~' + a.files.modified, '');
@@ -251,20 +348,20 @@ <h2>Event Stream</h2>
             card.appendChild(filesDiv);
         }
 
-        // Scrollable log
+        // Scrollable log (hidden when collapsed)
         if (a.log.length > 0) {
-            const logDiv = document.createElement('div');
+            var logDiv = document.createElement('div');
             logDiv.className = 'agent-log';
-            const visible = a.log.slice(-20);
+            var visible = a.log.slice(-20);
             for (var i = 0; i < visible.length; i++) {
                 var l = visible[i];
-                const lineEl = document.createElement('div');
+                var lineEl = document.createElement('div');
                 lineEl.className = 'agent-log-line';
-                const tagSpan = document.createElement('span');
+                var tagSpan = document.createElement('span');
                 tagSpan.className = 'stream-tag ' + l.tag;
                 tagSpan.textContent = l.tag;
                 lineEl.appendChild(tagSpan);
-                const textNode = document.createTextNode(l.line);
+                var textNode = document.createTextNode(l.line);
                 lineEl.appendChild(textNode);
                 logDiv.appendChild(lineEl);
             }
@@ -274,7 +371,7 @@ <h2>Event Stream</h2>
     }
 
     function addFileStat(parent, text, extraClass) {
-        const span = document.createElement('span');
+        var span = document.createElement('span');
         span.className = 'file-stat' + (extraClass ? ' ' + extraClass : '');
         span.textContent = text;
         parent.appendChild(span);
@@ -282,23 +379,23 @@ <h2>Event Stream</h2>
 
     // ── Global Event Log (safe DOM construction) ────────────────
     function appendLog(level, message) {
-        const log = document.getElementById('event-log');
-        const now = new Date().toLocaleTimeString('en-GB', { hour12: false });
+        var log = document.getElementById('event-log');
+        var now = new Date().toLocaleTimeString('en-GB', { hour12: false });
 
-        const entry = document.createElement('div');
+        var entry = document.createElement('div');
         entry.className = 'log-entry';
 
-        const timeSpan = document.createElement('span');
+        var timeSpan = document.createElement('span');
         timeSpan.className = 'log-time';
         timeSpan.textContent = now;
         entry.appendChild(timeSpan);
 
-        const levelSpan = document.createElement('span');
+        var levelSpan = document.createElement('span');
         levelSpan.className = 'log-level ' + level;
         levelSpan.textContent = level;
         entry.appendChild(levelSpan);
 
-        const msgSpan = document.createElement('span');
+        var msgSpan = document.createElement('span');
         msgSpan.className = 'log-message';
         msgSpan.textContent = message;
         entry.appendChild(msgSpan);
diff --git a/crates/thrum-api/assets/review.css b/crates/thrum-api/assets/review.css
index ef4b50a..3944aab 100644
--- a/crates/thrum-api/assets/review.css
+++ b/crates/thrum-api/assets/review.css
@@ -483,3 +483,17 @@
     color: var(--red);
     border: 1px solid var(--red);
 }
+
+.action-result.warning {
+    background: rgba(255, 165, 0, 0.12);
+    color: var(--amber, #ffa500);
+    border: 1px solid rgba(255, 165, 0, 0.5);
+    text-align: left;
+}
+
+.action-result.warning code {
+    background: rgba(255,165,0,0.15);
+    padding: 1px 4px;
+    border-radius: 3px;
+    font-size: 12px;
+}
diff --git a/crates/thrum-api/assets/style.css b/crates/thrum-api/assets/style.css
index 54aee39..85548f6 100644
--- a/crates/thrum-api/assets/style.css
+++ b/crates/thrum-api/assets/style.css
@@ -57,6 +57,28 @@ header .version {
     gap: 8px;
 }
 
+.header-help-link {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    width: 18px;
+    height: 18px;
+    border-radius: 50%;
+    background: var(--surface-raised);
+    border: 1px solid var(--border);
+    color: var(--accent);
+    font-size: 11px;
+    font-weight: 700;
+    text-decoration: none;
+    margin-left: 4px;
+}
+
+.header-help-link:hover {
+    background: var(--accent);
+    color: var(--bg);
+    border-color: var(--accent);
+}
+
 /* ── Connection Indicator ─────────────────── */
 
 .connection-dot {
@@ -185,7 +207,23 @@ header .version {
     color: var(--text-muted);
     text-transform: uppercase;
     letter-spacing: 1.5px;
+    margin-bottom: 4px;
+    cursor: help;
+}
+
+.section h2[title] {
+    border-bottom: 1px dotted var(--border);
+    display: inline-block;
+    padding-bottom: 1px;
+}
+
+.section-description {
+    font-size: 12px;
+    color: var(--text-muted);
+    opacity: 0.7;
     margin-bottom: 12px;
+    font-style: italic;
+    letter-spacing: 0;
 }
 
 .section-badge {
@@ -346,6 +384,8 @@ header .version {
     color: var(--text-muted);
     font-weight: 600;
     letter-spacing: 0.5px;
+    text-decoration: none;
+    cursor: help;
 }
 
 .timeline-step.done {
@@ -568,6 +608,8 @@ header .version {
 .badge-gate2-failed,
 .badge-gate3-failed      { background: #2a1a1a; color: var(--red); }
 .badge-rejected          { background: #2a1a1a; color: var(--red); border: 1px solid var(--red); }
+.badge-awaiting-ci       { background: #1a2a2a; color: var(--cyan); border: 1px solid var(--cyan); }
+.badge-ci-failed         { background: #2a1a1a; color: var(--red); border: 1px solid var(--red); }
 .badge-normal            { background: #1a2a3a; color: var(--cyan); }
 .badge-expanded-context  { background: #2a2a1a; color: var(--amber); }
 .badge-different-approach { background: #2a1a1a; color: var(--red); }
@@ -577,6 +619,70 @@ header .version {
 .badge-decision          { background: #1a2a3a; color: var(--cyan); }
 .badge-context           { background: #2a2a1a; color: var(--amber); }
 
+/* ── Action Result Notifications ───────────── */
+
+#task-action-result:empty {
+    display: none;
+}
+
+.action-result {
+    position: relative;
+    padding: 10px 36px 10px 16px;
+    border-radius: 6px;
+    font-size: 13px;
+    font-weight: 500;
+    margin-bottom: 10px;
+    animation: fadeIn 0.2s ease-out;
+}
+
+.action-result.success {
+    background: #1a2a1a;
+    border: 1px solid var(--green);
+    color: var(--green);
+}
+
+.action-result.error {
+    background: #2a1a1a;
+    border: 1px solid var(--red);
+    color: var(--red);
+}
+
+.action-result.warning {
+    background: #2a2010;
+    border: 1px solid var(--amber);
+    color: var(--amber);
+    text-align: left;
+}
+
+.action-result.warning code {
+    background: rgba(255,165,0,0.15);
+    padding: 1px 4px;
+    border-radius: 3px;
+    font-size: 12px;
+.action-dismiss {
+    position: absolute;
+    top: 50%;
+    right: 8px;
+    transform: translateY(-50%);
+    background: none;
+    border: none;
+    color: inherit;
+    font-size: 18px;
+    cursor: pointer;
+    opacity: 0.6;
+    padding: 4px 8px;
+    line-height: 1;
+}
+
+.action-dismiss:hover {
+    opacity: 1;
+}
+
+@keyframes fadeIn {
+    from { opacity: 0; transform: translateY(-4px); }
+    to   { opacity: 1; transform: translateY(0); }
+}
+
 /* ── Action Buttons ────────────────────────── */
 
 .actions {
@@ -636,6 +742,18 @@ header .version {
     color: var(--bg);
 }
 
+.btn-pr {
+    background: transparent;
+    color: var(--cyan);
+    border: 1px solid var(--cyan);
+    text-decoration: none;
+}
+
+.btn-pr:hover {
+    background: var(--cyan);
+    color: var(--bg);
+}
+
 .retry-exhausted {
     color: var(--red);
     font-weight: 600;
@@ -1034,6 +1152,49 @@ header .version {
     50% { opacity: 1; }
 }
 
+/* ── Sync Controls ────────────────────────── */
+
+.sync-controls {
+    display: flex;
+    gap: 10px;
+    align-items: center;
+    margin-bottom: 12px;
+}
+
+.sync-input {
+    background: var(--surface);
+    color: var(--text);
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    padding: 8px 12px;
+    font-family: inherit;
+    font-size: 13px;
+    flex: 1;
+    max-width: 300px;
+}
+
+.sync-input:focus {
+    outline: none;
+    border-color: var(--accent);
+}
+
+.btn-sync {
+    padding: 8px 18px;
+    border: 1px solid var(--cyan);
+    border-radius: 6px;
+    font-size: 13px;
+    font-family: inherit;
+    cursor: pointer;
+    background: transparent;
+    color: var(--cyan);
+    transition: all 0.15s;
+}
+
+.btn-sync:hover {
+    background: var(--cyan);
+    color: var(--bg);
+}
+
 /* ── Scrollbar ─────────────────────────────── */
 
 ::-webkit-scrollbar {
@@ -1048,3 +1209,155 @@ header .version {
     background: var(--border);
     border-radius: 3px;
 }
+
+/* V-Model Traceability */
+.vmodel-container {
+    padding: 8px 0;
+}
+
+.vmodel-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    margin-bottom: 12px;
+}
+
+.vmodel-header h4 {
+    font-size: 13px;
+    color: var(--text);
+    letter-spacing: 0.5px;
+    text-transform: uppercase;
+}
+
+.vmodel-legend {
+    display: flex;
+    gap: 12px;
+    font-size: 11px;
+    color: var(--text-muted);
+}
+
+.vmodel-legend-item.vmodel-done { color: var(--green); }
+.vmodel-legend-item.vmodel-pass { color: var(--green); }
+.vmodel-legend-item.vmodel-fail { color: var(--red); }
+.vmodel-legend-item.vmodel-pending { color: var(--text-muted); }
+
+.vmodel-row {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    padding: 6px 8px;
+    border-bottom: 1px solid var(--border);
+}
+
+.vmodel-row:last-child {
+    border-bottom: none;
+}
+
+.vmodel-req-id {
+    font-family: monospace;
+    font-size: 12px;
+    color: var(--accent);
+    min-width: 140px;
+    flex-shrink: 0;
+}
+
+.vmodel-chain {
+    display: flex;
+    align-items: center;
+    gap: 4px;
+    flex-wrap: wrap;
+}
+
+.vmodel-step {
+    font-size: 11px;
+    padding: 2px 8px;
+    border-radius: 3px;
+    background: var(--surface-raised);
+    white-space: nowrap;
+}
+
+.vmodel-step.done {
+    color: var(--green);
+    background: rgba(74, 222, 128, 0.1);
+}
+
+.vmodel-step.failed {
+    color: var(--red);
+    background: rgba(248, 113, 113, 0.1);
+}
+
+.vmodel-step.pending {
+    color: var(--text-muted);
+}
+
+.vmodel-arrow {
+    color: var(--text-muted);
+    font-size: 10px;
+}
+
+/* ── Trust Boundary Badges ──────────────────────────────────────────── */
+.badge-trust {
+    font-size: 10px;
+    padding: 1px 6px;
+    border-radius: 3px;
+    margin-left: 6px;
+    font-weight: 600;
+    text-transform: uppercase;
+}
+.risk-high {
+    background: rgba(255, 70, 70, 0.2);
+    color: #ff4646;
+    border: 1px solid rgba(255, 70, 70, 0.4);
+}
+.risk-security {
+    background: rgba(255, 165, 0, 0.2);
+    color: #ffa500;
+    border: 1px solid rgba(255, 165, 0, 0.4);
+}
+.risk-standard {
+    background: rgba(100, 100, 100, 0.2);
+    color: var(--text-muted);
+    border: 1px solid rgba(100, 100, 100, 0.3);
+}
+.risk-auto-ok {
+    background: rgba(46, 204, 113, 0.2);
+    color: #2ecc71;
+    border: 1px solid rgba(46, 204, 113, 0.4);
+}
+
+/* ── Trust Assessment Section (review page) ────────────────────────── */
+.trust-section {
+    margin-top: 16px;
+}
+.trust-warning {
+    background: rgba(255, 70, 70, 0.1);
+    border: 1px solid rgba(255, 70, 70, 0.4);
+    border-radius: 6px;
+    padding: 10px 14px;
+    margin-bottom: 12px;
+    color: #ff6b6b;
+    font-weight: 600;
+}
+.trust-info {
+    background: rgba(255, 165, 0, 0.1);
+    border: 1px solid rgba(255, 165, 0, 0.4);
+    border-radius: 6px;
+    padding: 10px 14px;
+    margin-bottom: 12px;
+    color: #ffa500;
+}
+.trust-file-table {
+    width: 100%;
+    border-collapse: collapse;
+    font-size: 13px;
+}
+.trust-file-table th {
+    text-align: left;
+    padding: 6px 10px;
+    border-bottom: 1px solid var(--border);
+    color: var(--text-muted);
+}
+.trust-file-table td {
+    padding: 4px 10px;
+    border-bottom: 1px solid rgba(255,255,255,0.05);
+}
diff --git a/crates/thrum-api/src/a2a.rs b/crates/thrum-api/src/a2a.rs
deleted file mode 100644
index 2d0a6ba..0000000
--- a/crates/thrum-api/src/a2a.rs
+++ /dev/null
@@ -1,885 +0,0 @@
-//! A2A (Agent-to-Agent) protocol HTTP handlers.
-//!
-//! Implements:
-//! - `GET /.well-known/agent.json` — Agent Card discovery
-//! - `POST /a2a` — JSON-RPC 2.0 dispatch (SendMessage, GetTask, ListTasks, CancelTask)
-//! - `GET /a2a/subscribe/{task_id}` — SSE stream for a specific task
-//! - `POST /a2a/stream` — SSE stream with task creation (SendMessage + subscribe)
-
-use axum::{
-    Json,
-    extract::{Path, State},
-    http::HeaderMap,
-    response::{
-        IntoResponse,
-        sse::{Event, KeepAlive, Sse},
-    },
-};
-use chrono::Utc;
-use std::collections::HashMap;
-use std::convert::Infallible;
-use std::sync::Arc;
-use thrum_core::a2a::*;
-use thrum_core::event::EventKind;
-use thrum_core::task::{RepoName, Task, TaskId, TaskStatus};
-use thrum_db::task_store::TaskStore;
-use tokio_stream::StreamExt;
-use tokio_stream::wrappers::BroadcastStream;
-
-use crate::ApiState;
-
-// ─── Agent Card ─────────────────────────────────────────────────────────
-
-/// `GET /.well-known/agent.json`
-///
-/// Returns the A2A Agent Card describing Thrum's capabilities.
-pub async fn agent_card(headers: HeaderMap) -> Json<AgentCard> {
-    let host = headers
-        .get("host")
-        .and_then(|v| v.to_str().ok())
-        .unwrap_or("localhost:3000");
-    let base_url = format!("http://{host}");
-    Json(AgentCard::thrum_default(&base_url))
-}
-
-// ─── JSON-RPC Dispatch ──────────────────────────────────────────────────
-
-/// `POST /a2a`
-///
-/// Single JSON-RPC 2.0 endpoint dispatching on `method`:
-/// - `a2a.SendMessage` — create or update a task
-/// - `a2a.GetTask` — retrieve a task by ID
-/// - `a2a.ListTasks` — list tasks, optionally filtered by context_id
-/// - `a2a.CancelTask` — cancel (reject) a non-terminal task
-pub async fn jsonrpc_handler(
-    State(state): State<Arc<ApiState>>,
-    Json(req): Json<serde_json::Value>,
-) -> Json<JsonRpcResponse> {
-    // Parse as JsonRpcRequest
-    let request: JsonRpcRequest = match serde_json::from_value(req) {
-        Ok(r) => r,
-        Err(e) => {
-            return Json(JsonRpcResponse::error(
-                serde_json::Value::Null,
-                PARSE_ERROR,
-                format!("invalid JSON-RPC request: {e}"),
-            ));
-        }
-    };
-
-    if request.jsonrpc != "2.0" {
-        return Json(JsonRpcResponse::error(
-            request.id,
-            INVALID_REQUEST,
-            "jsonrpc must be \"2.0\"",
-        ));
-    }
-
-    let result = match request.method.as_str() {
-        "a2a.SendMessage" => handle_send_message(&state, &request).await,
-        "a2a.GetTask" => handle_get_task(&state, &request),
-        "a2a.ListTasks" => handle_list_tasks(&state, &request),
-        "a2a.CancelTask" => handle_cancel_task(&state, &request),
-        _ => Err((
-            METHOD_NOT_FOUND,
-            format!("unknown method: {}", request.method),
-        )),
-    };
-
-    Json(match result {
-        Ok(value) => JsonRpcResponse::success(request.id, value),
-        Err((code, msg)) => JsonRpcResponse::error(request.id, code, msg),
-    })
-}
-
-type RpcResult = Result<serde_json::Value, (i64, String)>;
-
-async fn handle_send_message(state: &ApiState, req: &JsonRpcRequest) -> RpcResult {
-    let params: SendMessageParams = serde_json::from_value(req.params.clone())
-        .map_err(|e| (INVALID_PARAMS, format!("invalid params: {e}")))?;
-
-    let store = TaskStore::new(state.db());
-
-    // If task_id is provided, return the existing task
-    if let Some(ref a2a_id) = params.task_id {
-        let task_id = parse_thrum_task_id(a2a_id)
-            .ok_or_else(|| (INVALID_PARAMS, format!("invalid task_id: {a2a_id}")))?;
-        let task = store
-            .get(&task_id)
-            .map_err(|e| (INTERNAL_ERROR, format!("db error: {e}")))?
-            .ok_or_else(|| (TASK_NOT_FOUND, format!("task {a2a_id} not found")))?;
-        return Ok(serde_json::to_value(A2aTask::from_thrum_task(&task)).unwrap());
-    }
-
-    // Extract text from message parts for title/description
-    let text_parts: Vec<&str> = params
-        .message
-        .parts
-        .iter()
-        .filter_map(|p| match p {
-            A2aPart::Text { text } => Some(text.as_str()),
-            _ => None,
-        })
-        .collect();
-
-    let full_text = text_parts.join("\n");
-    let (title, description) = match full_text.split_once('\n') {
-        Some((t, d)) => (t.trim().to_string(), d.trim().to_string()),
-        None => (full_text.trim().to_string(), String::new()),
-    };
-
-    if title.is_empty() {
-        return Err((INVALID_PARAMS, "message must contain text".into()));
-    }
-
-    // Extract repo from metadata, default to "default"
-    let repo = params
-        .metadata
-        .get("repo")
-        .and_then(|v| v.as_str())
-        .unwrap_or("default");
-
-    let mut task = Task::new(RepoName::new(repo), title, description);
-    task.context_id = params.context_id;
-
-    let task = store
-        .insert(task)
-        .map_err(|e| (INTERNAL_ERROR, format!("failed to create task: {e}")))?;
-
-    // Emit event
-    state.event_bus.emit(EventKind::TaskStateChange {
-        task_id: task.id.clone(),
-        repo: task.repo.clone(),
-        from: "none".into(),
-        to: "pending".into(),
-    });
-
-    Ok(serde_json::to_value(A2aTask::from_thrum_task(&task)).unwrap())
-}
-
-fn handle_get_task(state: &ApiState, req: &JsonRpcRequest) -> RpcResult {
-    let params: GetTaskParams = serde_json::from_value(req.params.clone())
-        .map_err(|e| (INVALID_PARAMS, format!("invalid params: {e}")))?;
-
-    let task_id = parse_thrum_task_id(&params.task_id).ok_or_else(|| {
-        (
-            INVALID_PARAMS,
-            format!("invalid task_id: {}", params.task_id),
-        )
-    })?;
-
-    let store = TaskStore::new(state.db());
-    let task = store
-        .get(&task_id)
-        .map_err(|e| (INTERNAL_ERROR, format!("db error: {e}")))?
-        .ok_or_else(|| (TASK_NOT_FOUND, format!("task {} not found", params.task_id)))?;
-
-    Ok(serde_json::to_value(A2aTask::from_thrum_task(&task)).unwrap())
-}
-
-fn handle_list_tasks(state: &ApiState, req: &JsonRpcRequest) -> RpcResult {
-    let params: ListTasksParams =
-        serde_json::from_value(req.params.clone()).unwrap_or(ListTasksParams { context_id: None });
-
-    let store = TaskStore::new(state.db());
-    let tasks = store
-        .list(None, None)
-        .map_err(|e| (INTERNAL_ERROR, format!("db error: {e}")))?;
-
-    let a2a_tasks: Vec<A2aTask> = tasks
-        .iter()
-        .filter(|t| {
-            if let Some(ref ctx) = params.context_id {
-                a2a_context_id(t) == *ctx
-            } else {
-                true
-            }
-        })
-        .map(A2aTask::from_thrum_task)
-        .collect();
-
-    Ok(serde_json::to_value(a2a_tasks).unwrap())
-}
-
-fn handle_cancel_task(state: &ApiState, req: &JsonRpcRequest) -> RpcResult {
-    let params: CancelTaskParams = serde_json::from_value(req.params.clone())
-        .map_err(|e| (INVALID_PARAMS, format!("invalid params: {e}")))?;
-
-    let task_id = parse_thrum_task_id(&params.task_id).ok_or_else(|| {
-        (
-            INVALID_PARAMS,
-            format!("invalid task_id: {}", params.task_id),
-        )
-    })?;
-
-    let store = TaskStore::new(state.db());
-    let mut task = store
-        .get(&task_id)
-        .map_err(|e| (INTERNAL_ERROR, format!("db error: {e}")))?
-        .ok_or_else(|| (TASK_NOT_FOUND, format!("task {} not found", params.task_id)))?;
-
-    if task.status.is_terminal() {
-        return Err((
-            TASK_NOT_CANCELABLE,
-            "task is already in a terminal state".into(),
-        ));
-    }
-
-    let from = task.status.label().to_string();
-    task.status = TaskStatus::Rejected {
-        feedback: "canceled via A2A".into(),
-    };
-    task.updated_at = Utc::now();
-    store
-        .update(&task)
-        .map_err(|e| (INTERNAL_ERROR, format!("failed to update task: {e}")))?;
-
-    state.event_bus.emit(EventKind::TaskStateChange {
-        task_id: task.id.clone(),
-        repo: task.repo.clone(),
-        from,
-        to: "rejected".into(),
-    });
-
-    Ok(serde_json::to_value(A2aTask::from_thrum_task(&task)).unwrap())
-}
-
-// ─── SSE Subscribe ──────────────────────────────────────────────────────
-
-/// `GET /a2a/subscribe/{task_id}`
-///
-/// SSE stream of A2A events for a specific task. Filters the EventBus
-/// for events matching the given task ID.
-pub async fn subscribe_handler(
-    State(state): State<Arc<ApiState>>,
-    Path(a2a_id): Path<String>,
-) -> impl IntoResponse {
-    let target_id = parse_thrum_task_id(&a2a_id);
-    let rx = state.event_bus.subscribe();
-    let stream = BroadcastStream::new(rx);
-
-    let sse_stream = stream.filter_map(move |result| {
-        let target_id = target_id.clone();
-        match result {
-            Ok(event) => {
-                let a2a_event = pipeline_event_to_a2a(&event.kind, target_id.as_ref()?)?;
-                let json = serde_json::to_string(&a2a_event).ok()?;
-                Some(Ok::<_, Infallible>(
-                    Event::default().event("a2a_event").data(json),
-                ))
-            }
-            Err(_) => None,
-        }
-    });
-
-    Sse::new(sse_stream).keep_alive(KeepAlive::default())
-}
-
-// ─── SSE Streaming (SendMessage + subscribe) ────────────────────────────
-
-/// `POST /a2a/stream`
-///
-/// Creates a task via SendMessage semantics, then returns an SSE stream
-/// of A2A events for that task. The first event is always a `task` event
-/// with the full task state.
-pub async fn streaming_handler(
-    State(state): State<Arc<ApiState>>,
-    Json(req): Json<serde_json::Value>,
-) -> impl IntoResponse {
-    // Subscribe before creating the task to avoid missing the initial event
-    let rx = state.event_bus.subscribe();
-
-    // Parse and create task using the same logic as SendMessage
-    let request: JsonRpcRequest = match serde_json::from_value(req) {
-        Ok(r) => r,
-        Err(_) => {
-            return Json(JsonRpcResponse::error(
-                serde_json::Value::Null,
-                PARSE_ERROR,
-                "invalid JSON-RPC request",
-            ))
-            .into_response();
-        }
-    };
-
-    let task_result = handle_send_message(&state, &request).await;
-
-    let (a2a_task, task_id) = match task_result {
-        Ok(value) => {
-            let a2a_task: A2aTask = serde_json::from_value(value).unwrap();
-            let task_id = parse_thrum_task_id(&a2a_task.id);
-            (a2a_task, task_id)
-        }
-        Err((code, msg)) => {
-            return Json(JsonRpcResponse::error(request.id, code, msg)).into_response();
-        }
-    };
-
-    // Initial task event
-    let initial = A2aStreamEvent::Task { task: a2a_task };
-    let initial_json = serde_json::to_string(&initial).unwrap();
-    let initial_event = Event::default().event("a2a_event").data(initial_json);
-
-    // Follow-up events filtered from EventBus
-    let follow_stream = BroadcastStream::new(rx).filter_map(move |result| {
-        let task_id = task_id.clone();
-        match result {
-            Ok(event) => {
-                let a2a_event = pipeline_event_to_a2a(&event.kind, task_id.as_ref()?)?;
-                let json = serde_json::to_string(&a2a_event).ok()?;
-                Some(Ok::<_, Infallible>(
-                    Event::default().event("a2a_event").data(json),
-                ))
-            }
-            Err(_) => None,
-        }
-    });
-
-    let combined = tokio_stream::once(Ok(initial_event)).chain(follow_stream);
-    Sse::new(combined)
-        .keep_alive(KeepAlive::default())
-        .into_response()
-}
-
-// ─── Event Conversion ───────────────────────────────────────────────────
-
-/// Convert a Thrum `EventKind` to an A2A stream event.
-///
-/// Returns `None` if the event doesn't match the target task or isn't
-/// relevant for A2A streaming.
-fn pipeline_event_to_a2a(kind: &EventKind, target: &TaskId) -> Option<A2aStreamEvent> {
-    match kind {
-        EventKind::TaskStateChange {
-            task_id, from, to, ..
-        } if task_id == target => Some(A2aStreamEvent::StatusUpdate {
-            task_id: a2a_task_id(task_id),
-            status: A2aTaskStatus {
-                state: label_to_a2a_state(to),
-                timestamp: Utc::now(),
-                message: Some(format!("{from} -> {to}")),
-            },
-        }),
-
-        EventKind::AgentOutput { task_id, line, .. } if task_id == target => {
-            Some(A2aStreamEvent::Message {
-                message: A2aMessage {
-                    message_id: next_message_id(),
-                    role: A2aRole::Agent,
-                    parts: vec![A2aPart::Text { text: line.clone() }],
-                    metadata: HashMap::new(),
-                },
-            })
-        }
-
-        EventKind::AgentFinished {
-            task_id,
-            success,
-            elapsed_secs,
-            ..
-        } if task_id == target => {
-            let state = if *success {
-                A2aTaskState::Working
-            } else {
-                A2aTaskState::Failed
-            };
-            Some(A2aStreamEvent::StatusUpdate {
-                task_id: a2a_task_id(task_id),
-                status: A2aTaskStatus {
-                    state,
-                    timestamp: Utc::now(),
-                    message: Some(format!(
-                        "Agent finished ({}, {elapsed_secs:.1}s)",
-                        if *success { "success" } else { "failed" }
-                    )),
-                },
-            })
-        }
-
-        EventKind::DiffUpdate {
-            task_id,
-            files_changed,
-            insertions,
-            deletions,
-            ..
-        } if task_id == target => Some(A2aStreamEvent::ArtifactUpdate {
-            task_id: a2a_task_id(task_id),
-            artifact: A2aArtifact {
-                artifact_id: next_artifact_id(),
-                name: "diff".into(),
-                parts: vec![A2aPart::Data {
-                    data: serde_json::json!({
-                        "files_changed": files_changed,
-                        "insertions": insertions,
-                        "deletions": deletions,
-                    }),
-                }],
-                metadata: HashMap::new(),
-            },
-        }),
-
-        EventKind::GateFinished {
-            task_id,
-            level,
-            passed,
-            duration_secs,
-        } if task_id == target => Some(A2aStreamEvent::ArtifactUpdate {
-            task_id: a2a_task_id(task_id),
-            artifact: A2aArtifact {
-                artifact_id: next_artifact_id(),
-                name: format!("gate-{}", level),
-                parts: vec![A2aPart::Data {
-                    data: serde_json::json!({
-                        "level": format!("{level}"),
-                        "passed": passed,
-                        "duration_secs": duration_secs,
-                    }),
-                }],
-                metadata: HashMap::new(),
-            },
-        }),
-
-        _ => None,
-    }
-}
-
-/// Map a status label string back to an A2A state.
-fn label_to_a2a_state(label: &str) -> A2aTaskState {
-    match label {
-        "pending" | "claimed" => A2aTaskState::Submitted,
-        "implementing" | "reviewing" | "approved" | "integrating" => A2aTaskState::Working,
-        "gate1-failed" | "gate2-failed" | "gate3-failed" => A2aTaskState::Failed,
-        "awaiting-approval" => A2aTaskState::InputRequired,
-        "merged" => A2aTaskState::Completed,
-        "rejected" => A2aTaskState::Rejected,
-        _ => A2aTaskState::Working,
-    }
-}
-
-// ─── Tests ──────────────────────────────────────────────────────────────
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use axum::body::Body;
-    use axum::http::Request;
-    use tower::ServiceExt;
-
-    fn test_state() -> (Arc<ApiState>, tempfile::TempDir) {
-        let dir = tempfile::tempdir().unwrap();
-        let db_path = dir.path().join("test.redb");
-        let state = Arc::new(ApiState::new(&db_path, dir.path().join("traces"), None).unwrap());
-        (state, dir)
-    }
-
-    #[tokio::test]
-    async fn agent_card_returns_200() {
-        let (state, _dir) = test_state();
-        let app = crate::api_router(state);
-
-        let response = app
-            .oneshot(
-                Request::builder()
-                    .uri("/.well-known/agent.json")
-                    .body(Body::empty())
-                    .unwrap(),
-            )
-            .await
-            .unwrap();
-
-        assert_eq!(response.status(), 200);
-        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
-            .await
-            .unwrap();
-        let card: AgentCard = serde_json::from_slice(&body).unwrap();
-        assert_eq!(card.name, "Thrum");
-        assert_eq!(card.skills.len(), 3);
-        assert!(card.capabilities.streaming);
-    }
-
-    #[tokio::test]
-    async fn send_message_creates_task() {
-        let (state, _dir) = test_state();
-        let app = crate::api_router(state);
-
-        let body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 1,
-            "method": "a2a.SendMessage",
-            "params": {
-                "message": {
-                    "message_id": "m1",
-                    "role": "user",
-                    "parts": [{"type": "text", "text": "Implement feature X\nDetailed description here"}]
-                },
-                "metadata": {"repo": "loom"}
-            }
-        });
-
-        let response = app
-            .oneshot(
-                Request::builder()
-                    .method("POST")
-                    .uri("/a2a")
-                    .header("content-type", "application/json")
-                    .body(Body::from(serde_json::to_string(&body).unwrap()))
-                    .unwrap(),
-            )
-            .await
-            .unwrap();
-
-        assert_eq!(response.status(), 200);
-        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
-            .await
-            .unwrap();
-        let resp: JsonRpcResponse = serde_json::from_slice(&body).unwrap();
-        assert!(resp.error.is_none());
-        let task: A2aTask = serde_json::from_value(resp.result.unwrap()).unwrap();
-        assert_eq!(task.status.state, A2aTaskState::Submitted);
-        assert!(task.id.starts_with("thrum-"));
-        assert_eq!(task.metadata["repo"], "loom");
-    }
-
-    #[tokio::test]
-    async fn get_task_returns_task() {
-        let (state, _dir) = test_state();
-
-        // Insert a task
-        let task_id = {
-            let store = TaskStore::new(state.db());
-            let task = Task::new(RepoName::new("loom"), "Test".into(), "desc".into());
-            store.insert(task).unwrap().id
-        };
-
-        let app = crate::api_router(state);
-
-        let body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 1,
-            "method": "a2a.GetTask",
-            "params": {"task_id": a2a_task_id(&task_id)}
-        });
-
-        let response = app
-            .oneshot(
-                Request::builder()
-                    .method("POST")
-                    .uri("/a2a")
-                    .header("content-type", "application/json")
-                    .body(Body::from(serde_json::to_string(&body).unwrap()))
-                    .unwrap(),
-            )
-            .await
-            .unwrap();
-
-        assert_eq!(response.status(), 200);
-        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
-            .await
-            .unwrap();
-        let resp: JsonRpcResponse = serde_json::from_slice(&body).unwrap();
-        assert!(resp.error.is_none());
-        let task: A2aTask = serde_json::from_value(resp.result.unwrap()).unwrap();
-        assert_eq!(task.id, a2a_task_id(&task_id));
-    }
-
-    #[tokio::test]
-    async fn list_tasks_returns_all() {
-        let (state, _dir) = test_state();
-
-        // Insert two tasks
-        {
-            let store = TaskStore::new(state.db());
-            store
-                .insert(Task::new(RepoName::new("loom"), "T1".into(), "d1".into()))
-                .unwrap();
-            store
-                .insert(Task::new(RepoName::new("synth"), "T2".into(), "d2".into()))
-                .unwrap();
-        }
-
-        let app = crate::api_router(state);
-
-        let body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 1,
-            "method": "a2a.ListTasks",
-            "params": {}
-        });
-
-        let response = app
-            .oneshot(
-                Request::builder()
-                    .method("POST")
-                    .uri("/a2a")
-                    .header("content-type", "application/json")
-                    .body(Body::from(serde_json::to_string(&body).unwrap()))
-                    .unwrap(),
-            )
-            .await
-            .unwrap();
-
-        assert_eq!(response.status(), 200);
-        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
-            .await
-            .unwrap();
-        let resp: JsonRpcResponse = serde_json::from_slice(&body).unwrap();
-        let tasks: Vec<A2aTask> = serde_json::from_value(resp.result.unwrap()).unwrap();
-        assert_eq!(tasks.len(), 2);
-    }
-
-    #[tokio::test]
-    async fn cancel_task_rejects() {
-        let (state, _dir) = test_state();
-
-        let task_id = {
-            let store = TaskStore::new(state.db());
-            let task = Task::new(RepoName::new("loom"), "Test".into(), "desc".into());
-            store.insert(task).unwrap().id
-        };
-
-        let app = crate::api_router(state);
-
-        let body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 1,
-            "method": "a2a.CancelTask",
-            "params": {"task_id": a2a_task_id(&task_id)}
-        });
-
-        let response = app
-            .oneshot(
-                Request::builder()
-                    .method("POST")
-                    .uri("/a2a")
-                    .header("content-type", "application/json")
-                    .body(Body::from(serde_json::to_string(&body).unwrap()))
-                    .unwrap(),
-            )
-            .await
-            .unwrap();
-
-        assert_eq!(response.status(), 200);
-        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
-            .await
-            .unwrap();
-        let resp: JsonRpcResponse = serde_json::from_slice(&body).unwrap();
-        let task: A2aTask = serde_json::from_value(resp.result.unwrap()).unwrap();
-        assert_eq!(task.status.state, A2aTaskState::Rejected);
-    }
-
-    #[tokio::test]
-    async fn subscribe_returns_event_stream() {
-        let (state, _dir) = test_state();
-
-        // Insert a task
-        {
-            let store = TaskStore::new(state.db());
-            let task = Task::new(RepoName::new("loom"), "Test".into(), "desc".into());
-            store.insert(task).unwrap();
-        }
-
-        let app = crate::api_router(state);
-
-        let response = app
-            .oneshot(
-                Request::builder()
-                    .uri("/a2a/subscribe/thrum-1")
-                    .body(Body::empty())
-                    .unwrap(),
-            )
-            .await
-            .unwrap();
-
-        assert_eq!(response.status(), 200);
-        let ct = response
-            .headers()
-            .get("content-type")
-            .unwrap()
-            .to_str()
-            .unwrap();
-        assert!(
-            ct.contains("text/event-stream"),
-            "expected text/event-stream, got: {ct}"
-        );
-    }
-
-    #[tokio::test]
-    async fn unknown_method_returns_error() {
-        let (state, _dir) = test_state();
-        let app = crate::api_router(state);
-
-        let body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 1,
-            "method": "a2a.DoesNotExist",
-            "params": {}
-        });
-
-        let response = app
-            .oneshot(
-                Request::builder()
-                    .method("POST")
-                    .uri("/a2a")
-                    .header("content-type", "application/json")
-                    .body(Body::from(serde_json::to_string(&body).unwrap()))
-                    .unwrap(),
-            )
-            .await
-            .unwrap();
-
-        assert_eq!(response.status(), 200);
-        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
-            .await
-            .unwrap();
-        let resp: JsonRpcResponse = serde_json::from_slice(&body).unwrap();
-        assert!(resp.error.is_some());
-        assert_eq!(resp.error.unwrap().code, METHOD_NOT_FOUND);
-    }
-
-    #[tokio::test]
-    async fn get_nonexistent_task_returns_error() {
-        let (state, _dir) = test_state();
-        let app = crate::api_router(state);
-
-        let body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 1,
-            "method": "a2a.GetTask",
-            "params": {"task_id": "thrum-9999"}
-        });
-
-        let response = app
-            .oneshot(
-                Request::builder()
-                    .method("POST")
-                    .uri("/a2a")
-                    .header("content-type", "application/json")
-                    .body(Body::from(serde_json::to_string(&body).unwrap()))
-                    .unwrap(),
-            )
-            .await
-            .unwrap();
-
-        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
-            .await
-            .unwrap();
-        let resp: JsonRpcResponse = serde_json::from_slice(&body).unwrap();
-        assert!(resp.error.is_some());
-        assert_eq!(resp.error.unwrap().code, TASK_NOT_FOUND);
-    }
-
-    #[tokio::test]
-    async fn cancel_terminal_task_returns_error() {
-        let (state, _dir) = test_state();
-
-        // Insert and immediately merge a task
-        let task_id = {
-            let store = TaskStore::new(state.db());
-            let task = Task::new(RepoName::new("loom"), "Test".into(), "desc".into());
-            let task = store.insert(task).unwrap();
-            let mut t = task.clone();
-            t.status = TaskStatus::Merged {
-                commit_sha: "abc123".into(),
-            };
-            t.updated_at = Utc::now();
-            store.update(&t).unwrap();
-            t.id
-        };
-
-        let app = crate::api_router(state);
-
-        let body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 1,
-            "method": "a2a.CancelTask",
-            "params": {"task_id": a2a_task_id(&task_id)}
-        });
-
-        let response = app
-            .oneshot(
-                Request::builder()
-                    .method("POST")
-                    .uri("/a2a")
-                    .header("content-type", "application/json")
-                    .body(Body::from(serde_json::to_string(&body).unwrap()))
-                    .unwrap(),
-            )
-            .await
-            .unwrap();
-
-        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
-            .await
-            .unwrap();
-        let resp: JsonRpcResponse = serde_json::from_slice(&body).unwrap();
-        assert!(resp.error.is_some());
-        assert_eq!(resp.error.unwrap().code, TASK_NOT_CANCELABLE);
-    }
-
-    #[test]
-    fn event_conversion_task_state_change() {
-        let event = EventKind::TaskStateChange {
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-            from: "pending".into(),
-            to: "implementing".into(),
-        };
-        let result = pipeline_event_to_a2a(&event, &TaskId(1));
-        assert!(result.is_some());
-        assert!(matches!(
-            result.unwrap(),
-            A2aStreamEvent::StatusUpdate { .. }
-        ));
-    }
-
-    #[test]
-    fn event_conversion_wrong_task_returns_none() {
-        let event = EventKind::TaskStateChange {
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-            from: "pending".into(),
-            to: "implementing".into(),
-        };
-        let result = pipeline_event_to_a2a(&event, &TaskId(99));
-        assert!(result.is_none());
-    }
-
-    #[test]
-    fn event_conversion_agent_output() {
-        let event = EventKind::AgentOutput {
-            agent_id: thrum_core::agent::AgentId("a1".into()),
-            task_id: TaskId(1),
-            stream: thrum_core::event::OutputStream::Stdout,
-            line: "compiling...".into(),
-        };
-        let result = pipeline_event_to_a2a(&event, &TaskId(1));
-        assert!(matches!(result, Some(A2aStreamEvent::Message { .. })));
-    }
-
-    #[test]
-    fn event_conversion_diff_update() {
-        let event = EventKind::DiffUpdate {
-            agent_id: thrum_core::agent::AgentId("a1".into()),
-            task_id: TaskId(1),
-            files_changed: 3,
-            insertions: 42,
-            deletions: 7,
-        };
-        let result = pipeline_event_to_a2a(&event, &TaskId(1));
-        assert!(matches!(
-            result,
-            Some(A2aStreamEvent::ArtifactUpdate { .. })
-        ));
-    }
-
-    #[test]
-    fn label_to_state_coverage() {
-        assert_eq!(label_to_a2a_state("pending"), A2aTaskState::Submitted);
-        assert_eq!(label_to_a2a_state("implementing"), A2aTaskState::Working);
-        assert_eq!(label_to_a2a_state("gate1-failed"), A2aTaskState::Failed);
-        assert_eq!(
-            label_to_a2a_state("awaiting-approval"),
-            A2aTaskState::InputRequired
-        );
-        assert_eq!(label_to_a2a_state("merged"), A2aTaskState::Completed);
-        assert_eq!(label_to_a2a_state("rejected"), A2aTaskState::Rejected);
-    }
-}
diff --git a/crates/thrum-api/src/dashboard.rs b/crates/thrum-api/src/dashboard.rs
index 869cf2b..5992a50 100644
--- a/crates/thrum-api/src/dashboard.rs
+++ b/crates/thrum-api/src/dashboard.rs
@@ -5,7 +5,7 @@
 //! keeping interactivity server-driven with zero JS build step.
 
 use axum::{
-    Form, Router,
+    Form, Json, Router,
     extract::{Path, State},
     http::{StatusCode, header},
     response::{Html, IntoResponse, Response},
@@ -18,10 +18,13 @@ use std::sync::Arc;
 use thrum_core::repo::ReposConfig;
 use thrum_core::task::{CheckResult, GateReport, TaskId, TaskStatus};
 use thrum_core::telemetry::{TraceFilter, TraceReader};
+use thrum_core::trust::{RiskLevel, TrustAssessment};
 use thrum_db::budget_store::BudgetStore;
 use thrum_db::memory_store::MemoryStore;
 use thrum_db::task_store::TaskStore;
 
+use thrum_core::event::EventKind;
+
 use crate::ApiState;
 
 // ─── Embedded Assets ────────────────────────────────────────────────────
@@ -32,6 +35,9 @@ const LIVE_HTML: &str = include_str!("../assets/live.html");
 const LIVE_CSS: &str = include_str!("../assets/live.css");
 const REVIEW_HTML: &str = include_str!("../assets/review.html");
 const REVIEW_CSS: &str = include_str!("../assets/review.css");
+const HELP_HTML: &str = include_str!("../assets/help.html");
+const HELP_CSS: &str = include_str!("../assets/help.css");
+const FAVICON_SVG: &str = include_str!("../assets/favicon.svg");
 
 // ─── Router ─────────────────────────────────────────────────────────────
 
@@ -41,10 +47,14 @@ const REVIEW_CSS: &str = include_str!("../assets/review.css");
 pub fn dashboard_router() -> Router<Arc<ApiState>> {
     Router::new()
         .route("/dashboard", get(index))
+        .route("/dashboard/help", get(help_page))
+        .route("/dashboard/docs", get(help_page))
         .route("/dashboard/live", get(live_index))
         .route("/dashboard/assets/style.css", get(stylesheet))
         .route("/dashboard/assets/live.css", get(live_stylesheet))
         .route("/dashboard/assets/review.css", get(review_stylesheet))
+        .route("/dashboard/assets/help.css", get(help_stylesheet))
+        .route("/dashboard/assets/favicon.svg", get(favicon))
         .route("/dashboard/partials/status", get(status_partial))
         .route("/dashboard/partials/tasks", get(tasks_partial))
         .route("/dashboard/partials/activity", get(activity_partial))
@@ -71,7 +81,19 @@ pub fn dashboard_router() -> Router<Arc<ApiState>> {
         .route("/dashboard/memory/decay", post(decay_memory_action))
         .route("/dashboard/budget/update", post(update_budget_action))
         .route("/dashboard/partials/config", get(config_partial))
+        .route(
+            "/dashboard/partials/traceability",
+            get(traceability_partial),
+        )
+        .route("/dashboard/tasks/{id}/spec", post(update_spec_action))
+        .route(
+            "/dashboard/partials/dependencies",
+            get(dependencies_partial),
+        )
         .route("/dashboard/a2a/send", post(a2a_send_action))
+        // JSON API endpoints for surgical DOM updates (no full HTML round-trip)
+        .route("/dashboard/api/status", get(status_json))
+        .route("/dashboard/api/budget", get(budget_json))
 }
 
 // ─── Page & Assets ──────────────────────────────────────────────────────
@@ -111,6 +133,29 @@ async fn review_stylesheet() -> Response {
         .into_response()
 }
 
+async fn help_stylesheet() -> Response {
+    (
+        StatusCode::OK,
+        [(header::CONTENT_TYPE, "text/css; charset=utf-8")],
+        HELP_CSS,
+    )
+        .into_response()
+}
+
+async fn favicon() -> Response {
+    (
+        StatusCode::OK,
+        [(header::CONTENT_TYPE, "image/svg+xml")],
+        FAVICON_SVG,
+    )
+        .into_response()
+}
+
+/// GET /dashboard/help (and /dashboard/docs) — self-contained pipeline reference.
+async fn help_page() -> Html<&'static str> {
+    Html(HELP_HTML)
+}
+
 // ─── Review Page ────────────────────────────────────────────────────────
 
 /// GET /dashboard/tasks/{id}/review — full-page review for approval decisions.
@@ -192,6 +237,11 @@ async fn review_page(
          </div></div></div>",
     );
 
+    // ── Trust Assessment
+    if let Some(ref trust) = summary.trust_assessment {
+        render_trust_assessment(&mut content, trust);
+    }
+
     // ── Gate Reports
     render_gate_reports_section(&mut content, &summary.gate1_report, &summary.gate2_report);
 
@@ -380,8 +430,53 @@ fn render_description_section(buf: &mut String, task: &thrum_core::task::Task) {
     // Description
     let _ = write!(buf, "<p>{desc_esc}</p>");
 
-    // Acceptance criteria
-    if !task.acceptance_criteria.is_empty() {
+    // Verification-tagged criteria (preferred display)
+    if !task.tagged_criteria.is_empty() {
+        let (verified, failed, pending, total) =
+            thrum_core::verification::verification_summary(&task.tagged_criteria);
+        let _ = write!(
+            buf,
+            "<h4 style=\"margin-top:12px;font-size:12px;color:var(--text-muted);\
+                       text-transform:uppercase;letter-spacing:1px;\">Acceptance Criteria \
+             <span style=\"font-weight:normal;\">({verified}/{total} verified</span>)</h4>\
+             <ul class=\"criteria-list\" style=\"list-style:none;padding-left:0;\">",
+        );
+        let _ = (failed, pending); // used in summary above
+        for tc in &task.tagged_criteria {
+            let (icon, color) = match tc.status_label() {
+                "verified" => ("&#x2705;", "#22c55e"),
+                "failed" => ("&#x274c;", "#ef4444"),
+                _ => ("&#x23f3;", "#a3a3a3"),
+            };
+            let desc_esc = escape_html(&tc.description);
+            let tag = tc.tag.as_tag_str();
+            let _ = write!(
+                buf,
+                "<li style=\"padding:4px 0;\">\
+                 <span style=\"color:{color};\">{icon}</span> \
+                 {desc_esc} \
+                 <span style=\"font-size:11px;padding:1px 6px;border-radius:3px;\
+                              background:var(--bg-secondary);color:var(--text-muted);\">\
+                 {tag}</span>",
+            );
+            // Show verification details if any
+            if !tc.verifications.is_empty() {
+                buf.push_str(
+                    "<ul style=\"margin:2px 0 0 24px;font-size:11px;\
+                                color:var(--text-muted);list-style:none;\">",
+                );
+                for v in &tc.verifications {
+                    let v_icon = if v.passed { "&#x2714;" } else { "&#x2718;" };
+                    let check_esc = escape_html(&v.check_name);
+                    let _ = write!(buf, "<li>{v_icon} {check_esc}</li>");
+                }
+                buf.push_str("</ul>");
+            }
+            buf.push_str("</li>");
+        }
+        buf.push_str("</ul>");
+    } else if !task.acceptance_criteria.is_empty() {
+        // Fallback: plain string criteria (no tags yet)
         buf.push_str(
             "<h4 style=\"margin-top:12px;font-size:12px;color:var(--text-muted);\
                        text-transform:uppercase;letter-spacing:1px;\">Acceptance Criteria</h4>\
@@ -394,6 +489,50 @@ fn render_description_section(buf: &mut String, task: &thrum_core::task::Task) {
         buf.push_str("</ul>");
     }
 
+    // ── Spec section (view + inline edit) ──
+    if let Some(ref spec) = task.spec {
+        let _ = write!(
+            buf,
+            "<details style=\"margin:8px 0;\"><summary style=\"cursor:pointer;font-weight:600;\">\
+             &#x1F4CB; Structured Spec: {}</summary>\
+             <div style=\"margin:8px 0;font-size:12px;white-space:pre-wrap;background:var(--bg-secondary);\
+             padding:8px;border-radius:4px;max-height:300px;overflow-y:auto;\">{}</div>",
+            escape_html(&spec.title),
+            escape_html(&spec.to_markdown()),
+        );
+        let toml_content = spec.to_toml().unwrap_or_default();
+        let _ = write!(
+            buf,
+            "<form hx-post=\"/dashboard/tasks/{}/spec\" hx-target=\"closest .review-section\" \
+             hx-swap=\"outerHTML\" style=\"margin-top:8px;\">\
+             <textarea name=\"spec_toml\" rows=\"10\" \
+             style=\"width:100%;font-family:monospace;font-size:11px;background:var(--bg-secondary);\
+             color:var(--text-primary);border:1px solid var(--border-color);border-radius:4px;padding:4px;\">\
+             {}</textarea>\
+             <button type=\"submit\" style=\"margin-top:4px;\" class=\"btn btn-sm\">Update Spec</button>\
+             </form>",
+            task.id.0,
+            escape_html(&toml_content),
+        );
+        buf.push_str("</details>");
+    } else {
+        let _ = write!(
+            buf,
+            "<details style=\"margin:8px 0;\"><summary style=\"cursor:pointer;font-size:12px;\
+             color:var(--text-muted);\">&#x1F4CB; Add Structured Spec</summary>\
+             <form hx-post=\"/dashboard/tasks/{}/spec\" hx-target=\"closest .review-section\" \
+             hx-swap=\"outerHTML\" style=\"margin-top:8px;\">\
+             <textarea name=\"spec_toml\" rows=\"8\" placeholder='title = \"...\"\ncontext = \"...\"\n\n\
+             [[requirements]]\nid = \"REQ-001\"\ndescription = \"...\"\n\n[design]\napproach = \"...\"' \
+             style=\"width:100%;font-family:monospace;font-size:11px;background:var(--bg-secondary);\
+             color:var(--text-primary);border:1px solid var(--border-color);border-radius:4px;padding:4px;\">\
+             </textarea>\
+             <button type=\"submit\" style=\"margin-top:4px;\" class=\"btn btn-sm\">Set Spec</button>\
+             </form></details>",
+            task.id.0,
+        );
+    }
+
     buf.push_str("</div></div></div>");
 }
 
@@ -417,6 +556,67 @@ fn render_reviewer_section(buf: &mut String, reviewer_output: &str) {
     buf.push_str("</div></div>");
 }
 
+/// Render trust assessment section for the review page.
+fn render_trust_assessment(buf: &mut String, assessment: &TrustAssessment) {
+    let risk_class = risk_css_class(assessment.overall_risk);
+    let risk_label = assessment.overall_risk.to_string();
+
+    let _ = write!(
+        buf,
+        "<div class=\"review-section trust-section\">\
+         <div class=\"section-header\">\
+         <h3>Trust Boundary Assessment</h3>\
+         <span class=\"badge-trust {risk_class}\">{risk_label}</span>\
+         </div><div class=\"section-body\">",
+    );
+
+    if assessment.requires_human_review {
+        let _ = write!(
+            buf,
+            "<div class=\"trust-warning\">\
+             &#x26A0; High-risk files changed — manual review required. \
+             This task cannot be batch-approved.</div>",
+        );
+    }
+    if assessment.triggers_security_checks {
+        let _ = write!(
+            buf,
+            "<div class=\"trust-info\">\
+             &#x1F50D; Security-sensitive files changed — extra security checks triggered.</div>",
+        );
+    }
+
+    if !assessment.file_risks.is_empty() {
+        let _ = write!(
+            buf,
+            "<table class=\"trust-file-table\">\
+             <thead><tr><th>File</th><th>Risk</th></tr></thead><tbody>",
+        );
+        for (file, risk) in &assessment.file_risks {
+            let cls = risk_css_class(*risk);
+            let file_esc = escape_html(file);
+            let _ = write!(
+                buf,
+                "<tr><td><code>{file_esc}</code></td>\
+                 <td><span class=\"badge-trust {cls}\">{risk}</span></td></tr>",
+            );
+        }
+        let _ = write!(buf, "</tbody></table>");
+    }
+
+    let _ = write!(buf, "</div></div>");
+}
+
+/// CSS class for a risk level.
+fn risk_css_class(level: RiskLevel) -> &'static str {
+    match level {
+        RiskLevel::HighRisk => "risk-high",
+        RiskLevel::SecuritySensitive => "risk-security",
+        RiskLevel::Standard => "risk-standard",
+        RiskLevel::AutoOk => "risk-auto-ok",
+    }
+}
+
 /// Render gate reports section with expandable check details.
 fn render_gate_reports_section(
     buf: &mut String,
@@ -666,7 +866,8 @@ async fn tasks_partial(State(state): State<Arc<ApiState>>) -> Result<Html<String
     Ok(Html(html))
 }
 
-/// Activity log — recent trace events rendered as log lines.
+/// Activity log — recent pipeline-meaningful events (gate results, state
+/// transitions, errors). Generic infrastructure traces are filtered out.
 async fn activity_partial(
     State(state): State<Arc<ApiState>>,
 ) -> Result<Html<String>, DashboardError> {
@@ -676,6 +877,7 @@ async fn activity_partial(
         level: None,
         target_prefix: None,
         field_filter: None,
+        pipeline_only: true,
     };
 
     let events = reader.read_events(&filter).unwrap_or_default();
@@ -876,7 +1078,74 @@ async fn task_detail_partial(
         escape_html(&task.description),
     );
 
-    if !task.acceptance_criteria.is_empty() {
+    // Show trust boundary risk badge if available
+    if let TaskStatus::AwaitingApproval { ref summary } = task.status
+        && let Some(ref assessment) = summary.trust_assessment
+    {
+        let cls = risk_css_class(assessment.overall_risk);
+        let risk_label = assessment.overall_risk.to_string();
+        let _ = write!(
+            html,
+            "<div style=\"margin:8px 0;\">\
+             <span class=\"badge-trust {cls}\" \
+             title=\"Trust boundary: {risk_label}\">{risk_label}</span>",
+        );
+        if assessment.requires_human_review {
+            html.push_str(
+                " <span style=\"font-size:11px;color:#ef4444;\"\
+                 >&#x26a0; requires human review</span>",
+            );
+        }
+        if assessment.triggers_security_checks {
+            html.push_str(
+                " <span style=\"font-size:11px;color:#f59e0b;\"\
+                 >&#x1f50d; security checks triggered</span>",
+            );
+        }
+        html.push_str("</div>");
+    }
+
+    // Show verification-tagged criteria with status icons and progress bar
+    if !task.tagged_criteria.is_empty() {
+        let report = thrum_core::verification::VerificationReport::from_criteria(
+            task.id.0,
+            &task.tagged_criteria,
+        );
+        let pct = if report.total_count > 0 {
+            (report.verified_count * 100) / report.total_count
+        } else {
+            0
+        };
+        let bar_color = if report.has_failures() {
+            "#ef4444"
+        } else if report.all_verified() {
+            "#22c55e"
+        } else {
+            "#3b82f6"
+        };
+        let _ = write!(
+            html,
+            "<div style=\"margin:8px 0;\">\
+             <div style=\"font-size:11px;color:var(--text-muted);margin-bottom:2px;\">\
+             {}/{} criteria verified</div>\
+             <div style=\"height:4px;background:var(--bg-secondary);border-radius:2px;\">\
+             <div style=\"height:4px;width:{pct}%;background:{bar_color};border-radius:2px;\"></div>\
+             </div></div>",
+            report.verified_count, report.total_count,
+        );
+        html.push_str("<ul class=\"criteria\" style=\"list-style:none;padding-left:0;\">");
+        for tc in &task.tagged_criteria {
+            let icon = match tc.status_label() {
+                "verified" => "&#x2705;",
+                "failed" => "&#x274c;",
+                _ => "&#x23f3;",
+            };
+            let desc_esc = escape_html(&tc.description);
+            let tag = tc.tag.as_tag_str();
+            let _ = write!(html, "<li>{icon} {desc_esc} <small>{tag}</small></li>");
+        }
+        html.push_str("</ul>");
+    } else if !task.acceptance_criteria.is_empty() {
         html.push_str("<ul class=\"criteria\">");
         for ac in &task.acceptance_criteria {
             let _ = write!(html, "<li>{}</li>", escape_html(ac));
@@ -884,16 +1153,117 @@ async fn task_detail_partial(
         html.push_str("</ul>");
     }
 
+    // Show spec section (view + edit)
+    if let Some(ref spec) = task.spec {
+        let _ = write!(
+            html,
+            "<details style=\"margin:8px 0;\"><summary style=\"cursor:pointer;font-weight:600;\">\
+             📋 Structured Spec: {}</summary>\
+             <div style=\"margin:8px 0;font-size:12px;white-space:pre-wrap;background:var(--bg-secondary);\
+             padding:8px;border-radius:4px;max-height:300px;overflow-y:auto;\">{}</div>",
+            escape_html(&spec.title),
+            escape_html(&spec.to_markdown()),
+        );
+
+        // Inline edit form for spec (TOML)
+        let toml_content = spec.to_toml().unwrap_or_default();
+        let _ = write!(
+            html,
+            "<form hx-post=\"/dashboard/tasks/{}/spec\" hx-target=\"closest .task-detail\" \
+             hx-swap=\"outerHTML\" style=\"margin-top:8px;\">\
+             <textarea name=\"spec_toml\" rows=\"10\" \
+             style=\"width:100%;font-family:monospace;font-size:11px;background:var(--bg-secondary);\
+             color:var(--text-primary);border:1px solid var(--border-color);border-radius:4px;padding:4px;\">\
+             {}</textarea>\
+             <button type=\"submit\" style=\"margin-top:4px;\" class=\"btn btn-sm\">Update Spec</button>\
+             </form>",
+            task.id.0,
+            escape_html(&toml_content),
+        );
+        html.push_str("</details>");
+    } else {
+        // No spec — show form to add one
+        let _ = write!(
+            html,
+            "<details style=\"margin:8px 0;\"><summary style=\"cursor:pointer;font-size:12px;\
+             color:var(--text-muted);\">📋 Add Structured Spec</summary>\
+             <form hx-post=\"/dashboard/tasks/{}/spec\" hx-target=\"closest .task-detail\" \
+             hx-swap=\"outerHTML\" style=\"margin-top:8px;\">\
+             <textarea name=\"spec_toml\" rows=\"8\" placeholder='title = \"...\"\ncontext = \"...\"\n\n\
+             [[requirements]]\nid = \"REQ-001\"\ndescription = \"...\"\n\n[design]\napproach = \"...\"' \
+             style=\"width:100%;font-family:monospace;font-size:11px;background:var(--bg-secondary);\
+             color:var(--text-primary);border:1px solid var(--border-color);border-radius:4px;padding:4px;\">\
+             </textarea>\
+             <button type=\"submit\" style=\"margin-top:4px;\" class=\"btn btn-sm\">Set Spec</button>\
+             </form></details>",
+            task.id.0,
+        );
+    }
+
+    // Show current gate failure report if the task is in a gate-failed state
+    if let Some(report) = task.status.gate_report() {
+        html.push_str(
+            "<div class=\"gate-failure-section\" style=\"margin-top:12px;\">\
+             <details open>\
+             <summary style=\"cursor:pointer;font-weight:600;color:#ef4444;\">\
+             Gate Failure Report</summary>\
+             <div style=\"margin-top:8px;\">",
+        );
+        render_single_gate_report(&mut html, report, "detail");
+        html.push_str("</div></details></div>");
+    }
+
+    // Show historical gate reports from previous retries
+    if !task.gate_history.is_empty() {
+        html.push_str(
+            "<div class=\"gate-history-section\" style=\"margin-top:12px;\">\
+             <details>\
+             <summary style=\"cursor:pointer;font-weight:600;color:var(--text-muted);\">\
+             Previous Gate Reports (",
+        );
+        let _ = write!(html, "{}", task.gate_history.len());
+        html.push_str(
+            ")</summary>\
+             <div style=\"margin-top:8px;\">",
+        );
+        for (i, report) in task.gate_history.iter().enumerate() {
+            let prefix = format!("hist-{i}");
+            let _ = write!(
+                html,
+                "<div style=\"margin-bottom:8px;padding:4px 0;\
+                 border-bottom:1px solid var(--border);\">\
+                 <span style=\"font-size:11px;color:var(--text-muted);\">\
+                 Attempt {}</span>",
+                i + 1
+            );
+            render_single_gate_report(&mut html, report, &prefix);
+            html.push_str("</div>");
+        }
+        html.push_str("</div></details></div>");
+    }
+
     html.push_str("</div>");
     Ok(Html(html))
 }
 
 // ─── Actions ────────────────────────────────────────────────────────────
 
+#[derive(serde::Deserialize, Default)]
+struct ApproveQuery {
+    /// When true, bypass the high-risk trust boundary warning.
+    /// The human operator has explicitly reviewed the diff.
+    #[serde(default)]
+    force: Option<bool>,
+}
+
 /// Approve a task and return a success message (or updated row for dashboard).
+///
+/// If the task's trust assessment indicates high-risk files were changed,
+/// approval requires explicit acknowledgment via `?force=true`.
 async fn approve_action(
     State(state): State<Arc<ApiState>>,
     Path(id): Path<i64>,
+    axum::extract::Query(query): axum::extract::Query<ApproveQuery>,
 ) -> Result<Html<String>, DashboardError> {
     let db = state.db();
     let store = TaskStore::new(db);
@@ -910,9 +1280,37 @@ async fn approve_action(
         )));
     }
 
+    // Block auto-approval for high-risk files unless explicitly forced
+    if !query.force.unwrap_or(false)
+        && let TaskStatus::AwaitingApproval { ref summary } = task.status
+        && let Some(ref assessment) = summary.trust_assessment
+        && assessment.requires_human_review
+    {
+        let high_risk_files: Vec<&str> = assessment
+            .file_risks
+            .iter()
+            .filter(|(_, r)| *r == thrum_core::trust::RiskLevel::HighRisk)
+            .map(|(f, _)| f.as_str())
+            .collect();
+        return Ok(Html(format!(
+            "<div class=\"action-result warning\">\
+             <strong>&#x26a0; HIGH-RISK files changed</strong> — \
+             this task touches trust-boundary files that require careful human review: \
+             <code>{}</code>. \
+             <br>Please review the diff thoroughly before approving. \
+             <button class=\"btn btn-approve btn-sm\" \
+             hx-post=\"/dashboard/tasks/{id}/approve?force=true\" \
+             hx-target=\"#action-result\" \
+             hx-swap=\"innerHTML\">I have reviewed — Approve</button>\
+             </div>",
+            high_risk_files.join("</code>, <code>"),
+        )));
+    }
+
     task.status = TaskStatus::Approved;
     task.updated_at = Utc::now();
     store.update(&task)?;
+    state.event_bus.emit(EventKind::TaskDataChanged);
 
     // Return a success message (works for both dashboard row swap and review page)
     Ok(Html(format!(
@@ -944,6 +1342,7 @@ async fn reject_action(
     };
     task.updated_at = Utc::now();
     store.update(&task)?;
+    state.event_bus.emit(EventKind::TaskDataChanged);
 
     Ok(Html(format!(
         "<div class=\"action-result error\">\
@@ -971,13 +1370,18 @@ async fn create_task_action(
     let store = TaskStore::new(db);
     let repo_name = thrum_core::task::RepoName::new(&form.repo);
     let mut task = thrum_core::task::Task::new(repo_name, form.title, form.description);
-    task.acceptance_criteria = form
+    let raw_criteria: Vec<String> = form
         .acceptance_criteria
         .lines()
         .map(|l| l.trim().to_string())
         .filter(|l| !l.is_empty())
         .collect();
+    // Enrich criteria with verification tags if not already tagged
+    task.acceptance_criteria = thrum_core::verification::enrich_criteria(&raw_criteria);
+    let audit = thrum_core::verification::audit_criteria(&task.acceptance_criteria);
+    task.tagged_criteria = audit.tagged_criteria;
     let task = store.insert(task)?;
+    state.event_bus.emit(EventKind::TaskDataChanged);
     Ok(Html(format!(
         "<div class=\"action-result success\">\
          Created TASK-{:04}: {}</div>",
@@ -1006,19 +1410,78 @@ async fn edit_task_action(
         .ok_or_else(|| DashboardError(format!("task {id} not found")))?;
     task.title = form.title;
     task.description = form.description;
-    task.acceptance_criteria = form
+    let raw_criteria: Vec<String> = form
         .acceptance_criteria
         .lines()
         .map(|l| l.trim().to_string())
         .filter(|l| !l.is_empty())
         .collect();
+    // Enrich criteria with verification tags if not already tagged
+    task.acceptance_criteria = thrum_core::verification::enrich_criteria(&raw_criteria);
+    let audit = thrum_core::verification::audit_criteria(&task.acceptance_criteria);
+    task.tagged_criteria = audit.tagged_criteria;
     task.updated_at = Utc::now();
     store.update(&task)?;
+    state.event_bus.emit(EventKind::TaskDataChanged);
     Ok(Html(format!(
         "<div class=\"action-result success\">TASK-{id:04} updated</div>"
     )))
 }
 
+#[derive(Deserialize)]
+struct UpdateSpecForm {
+    spec_toml: String,
+}
+
+/// Update a task's structured specification from TOML input.
+async fn update_spec_action(
+    State(state): State<Arc<ApiState>>,
+    Path(id): Path<i64>,
+    Form(form): Form<UpdateSpecForm>,
+) -> Result<Html<String>, DashboardError> {
+    let db = state.db();
+    let store = TaskStore::new(db);
+    let mut task = store
+        .get(&TaskId(id))?
+        .ok_or_else(|| DashboardError(format!("task {id} not found")))?;
+
+    if form.spec_toml.trim().is_empty() {
+        // Clear the spec
+        task.spec = None;
+        task.updated_at = Utc::now();
+        store.update(&task)?;
+        return Ok(Html(format!(
+            "<div class=\"action-result success\">Spec cleared for TASK-{id:04}</div>"
+        )));
+    }
+
+    match thrum_core::spec::Spec::from_toml(&form.spec_toml) {
+        Ok(spec) => {
+            // Update acceptance criteria from spec
+            task.acceptance_criteria = spec.tagged_acceptance_criteria();
+            let audit = thrum_core::verification::audit_criteria(&task.acceptance_criteria);
+            task.tagged_criteria = audit.tagged_criteria;
+
+            // Use first spec requirement as requirement_id if not already set
+            if task.requirement_id.is_none() {
+                task.requirement_id = spec.requirements.first().map(|r| r.id.clone());
+            }
+
+            task.spec = Some(spec);
+            task.updated_at = Utc::now();
+            store.update(&task)?;
+
+            Ok(Html(format!(
+                "<div class=\"action-result success\">Spec updated for TASK-{id:04}</div>"
+            )))
+        }
+        Err(e) => Ok(Html(format!(
+            "<div class=\"action-result error\">Invalid TOML: {}</div>",
+            escape_html(&e.to_string())
+        ))),
+    }
+}
+
 #[derive(Deserialize)]
 struct SetStatusForm {
     status: String,
@@ -1053,6 +1516,7 @@ async fn set_status_action(
     };
     task.updated_at = Utc::now();
     store.update(&task)?;
+    state.event_bus.emit(EventKind::TaskDataChanged);
     Ok(Html(format!(
         "<div class=\"action-result success\">\
          TASK-{id:04} status set to {}</div>",
@@ -1068,6 +1532,7 @@ async fn delete_task_action(
     let store = TaskStore::new(db);
     let existed = store.delete(&TaskId(id))?;
     if existed {
+        state.event_bus.emit(EventKind::TaskDataChanged);
         Ok(Html(format!(
             "<div class=\"action-result success\">TASK-{id:04} deleted</div>"
         )))
@@ -1092,6 +1557,7 @@ async fn retry_task_action(
     task.status = TaskStatus::Pending;
     task.updated_at = Utc::now();
     store.update(&task)?;
+    state.event_bus.emit(EventKind::TaskDataChanged);
     Ok(Html(format!(
         "<div class=\"action-result success\">\
          TASK-{id:04} reset for retry (was {old_status}, retries cleared)</div>"
@@ -1112,11 +1578,23 @@ async fn bulk_approve_action(
     let store = TaskStore::new(db);
     let mut approved = 0u32;
     let mut skipped = 0u32;
+    let mut high_risk_blocked = Vec::new();
     for id_str in &form.task_ids {
         if let Ok(id) = id_str.parse::<i64>() {
             if let Ok(Some(mut task)) = store.get(&TaskId(id))
                 && task.status.needs_human()
             {
+                // Block high-risk tasks from bulk approval — require individual review
+                if let TaskStatus::AwaitingApproval { ref summary } = task.status
+                    && summary
+                        .trust_assessment
+                        .as_ref()
+                        .is_some_and(|ta| ta.requires_human_review)
+                {
+                    high_risk_blocked.push(format!("TASK-{id:04}"));
+                    skipped += 1;
+                    continue;
+                }
                 task.status = TaskStatus::Approved;
                 task.updated_at = Utc::now();
                 if store.update(&task).is_ok() {
@@ -1127,9 +1605,18 @@ async fn bulk_approve_action(
             skipped += 1;
         }
     }
+    let mut msg = format!("Approved {approved} tasks, skipped {skipped}");
+    if !high_risk_blocked.is_empty() {
+        msg.push_str(&format!(
+            ". &#x26a0; {} blocked (high-risk files — review individually)",
+            high_risk_blocked.join(", ")
+        ));
+    }
+    if approved > 0 {
+        state.event_bus.emit(EventKind::TaskDataChanged);
+    }
     Ok(Html(format!(
-        "<div class=\"action-result success\">\
-         Approved {approved} tasks, skipped {skipped}</div>"
+        "<div class=\"action-result success\">{msg}</div>"
     )))
 }
 
@@ -1158,6 +1645,7 @@ async fn clear_memory_action(
     } else {
         format!("repo '{}'", escape_html(&form.repo))
     };
+    state.event_bus.emit(EventKind::MemoryUpdated);
     Ok(Html(format!(
         "<div class=\"action-result success\">\
          Cleared {count} memory entries for {scope}</div>"
@@ -1179,6 +1667,7 @@ async fn decay_memory_action(
     let half_life: f64 = form.half_life_hours.parse().unwrap_or(168.0);
     let decayed = store.decay_all(half_life)?;
     let pruned = store.prune_below(0.05)?;
+    state.event_bus.emit(EventKind::MemoryUpdated);
     Ok(Html(format!(
         "<div class=\"action-result success\">\
          Decayed {decayed} entries (half-life {half_life:.0}h), pruned {pruned} below threshold</div>"
@@ -1215,6 +1704,9 @@ async fn update_budget_action(
         changes.push(format!("reset ${old_spent:.2} spent"));
     }
     budget_store.save(&tracker)?;
+    if !changes.is_empty() {
+        state.event_bus.emit(EventKind::BudgetUpdated);
+    }
     let msg = if changes.is_empty() {
         "No changes made".to_string()
     } else {
@@ -1315,9 +1807,407 @@ async fn a2a_send_action(
     )))
 }
 
+// ─── Traceability V-Model ─────────────────────────────────────────────
+
+/// V-model traceability visualization showing REQ→DESIGN→IMPL→TEST→PROOF→REVIEW chain.
+async fn traceability_partial(
+    State(state): State<Arc<ApiState>>,
+) -> Result<Html<String>, DashboardError> {
+    let db = state.db();
+    let trace_store = thrum_db::trace_store::TraceStore::new(db);
+    let all_records = trace_store.list_all(None, None)?;
+
+    let mut html = String::with_capacity(4096);
+
+    if all_records.is_empty() {
+        html.push_str(
+            "<div class=\"empty\">No traceability records yet. \
+             Records are created as tasks move through the pipeline.</div>",
+        );
+        return Ok(Html(html));
+    }
+
+    // Build matrix from all records
+    let matrix = thrum_core::traceability::TraceabilityMatrix::from_records(&all_records);
+
+    // V-model visualization header
+    html.push_str(
+        "<div class=\"vmodel-container\">\
+         <div class=\"vmodel-header\">\
+         <h4>V-Model Traceability Chain</h4>\
+         <div class=\"vmodel-legend\">\
+         <span class=\"vmodel-legend-item vmodel-done\">&#x25cf; Done</span>\
+         <span class=\"vmodel-legend-item vmodel-pass\">&#x2714; Passed</span>\
+         <span class=\"vmodel-legend-item vmodel-fail\">&#x2718; Failed</span>\
+         <span class=\"vmodel-legend-item vmodel-pending\">&#x25cb; Pending</span>\
+         </div></div>",
+    );
+
+    // Group records by requirement
+    let mut by_req: std::collections::HashMap<String, Vec<&thrum_core::traceability::TraceRecord>> =
+        std::collections::HashMap::new();
+    for r in &all_records {
+        by_req.entry(r.requirement_id.clone()).or_default().push(r);
+    }
+
+    // V-model per requirement
+    let mut req_ids: Vec<_> = by_req.keys().cloned().collect();
+    req_ids.sort();
+
+    for req_id in &req_ids {
+        let records = &by_req[req_id];
+        let req_esc = escape_html(req_id);
+
+        // Determine which artifact types exist
+        let has_req = records.iter().any(|r| {
+            matches!(
+                r.artifact,
+                thrum_core::traceability::TraceArtifact::Requirement { .. }
+            )
+        });
+        let has_design = records.iter().any(|r| {
+            matches!(
+                r.artifact,
+                thrum_core::traceability::TraceArtifact::Design { .. }
+            )
+        });
+        let has_impl = records.iter().any(|r| {
+            matches!(
+                r.artifact,
+                thrum_core::traceability::TraceArtifact::Implementation { .. }
+            )
+        });
+        let test_status = records.iter().find_map(|r| {
+            if let thrum_core::traceability::TraceArtifact::Test { passed, .. } = &r.artifact {
+                Some(*passed)
+            } else {
+                None
+            }
+        });
+        let proof_status = records.iter().find_map(|r| {
+            if let thrum_core::traceability::TraceArtifact::Proof { passed, .. } = &r.artifact {
+                Some(*passed)
+            } else {
+                None
+            }
+        });
+        let review_status = records.iter().find_map(|r| {
+            if let thrum_core::traceability::TraceArtifact::Review { approved, .. } = &r.artifact {
+                Some(*approved)
+            } else {
+                None
+            }
+        });
+
+        let _ = write!(
+            html,
+            "<div class=\"vmodel-row\">\
+             <div class=\"vmodel-req-id\">{req_esc}</div>\
+             <div class=\"vmodel-chain\">",
+        );
+
+        // Each step in the V-model chain
+        let steps: &[(&str, Option<bool>)] = &[
+            ("REQ", if has_req { Some(true) } else { None }),
+            ("DESIGN", if has_design { Some(true) } else { None }),
+            ("IMPL", if has_impl { Some(true) } else { None }),
+            ("TEST", test_status),
+            ("PROOF", proof_status),
+            ("REVIEW", review_status),
+        ];
+
+        for (i, (label, status)) in steps.iter().enumerate() {
+            let (class, icon) = match status {
+                Some(true) => ("vmodel-step done", "&#x2714;"),
+                Some(false) => ("vmodel-step failed", "&#x2718;"),
+                None => ("vmodel-step pending", "&#x25cb;"),
+            };
+            let _ = write!(html, "<span class=\"{class}\">{icon} {label}</span>");
+            if i < steps.len() - 1 {
+                html.push_str("<span class=\"vmodel-arrow\">&#x2192;</span>");
+            }
+        }
+
+        html.push_str("</div></div>");
+    }
+
+    html.push_str("</div>");
+
+    // Traceability matrix table
+    if !matrix.entries.is_empty() {
+        html.push_str(
+            "<h4 style=\"margin-top:16px;\">Traceability Matrix</h4>\
+             <table class=\"task-table\">\
+             <thead><tr>\
+             <th>Requirement</th><th>Design</th><th>Implementation</th>\
+             <th>Test</th><th>Proof</th><th>Review</th>\
+             </tr></thead><tbody>",
+        );
+
+        for entry in &matrix.entries {
+            let req_esc = escape_html(&entry.requirement_id);
+            let design = entry
+                .design
+                .as_deref()
+                .map(|d| {
+                    let truncated: String = d.chars().take(40).collect();
+                    escape_html(&truncated)
+                })
+                .unwrap_or_else(|| "\u{2014}".to_string());
+            let impl_val = entry
+                .implementation_commit
+                .as_deref()
+                .map(|c| {
+                    let short: String = c.chars().take(8).collect();
+                    escape_html(&short)
+                })
+                .unwrap_or_else(|| "\u{2014}".to_string());
+            let test_val = entry
+                .test_status
+                .map(|b| if b { "&#x2714;" } else { "&#x2718;" })
+                .unwrap_or("\u{2014}");
+            let proof_val = entry
+                .proof_status
+                .map(|b| if b { "&#x2714;" } else { "&#x2718;" })
+                .unwrap_or("\u{2014}");
+            let review_val = entry
+                .review_status
+                .map(|b| if b { "&#x2714;" } else { "&#x2718;" })
+                .unwrap_or("\u{2014}");
+
+            let _ = write!(
+                html,
+                "<tr>\
+                 <td class=\"task-id\">{req_esc}</td>\
+                 <td>{design}</td>\
+                 <td><code>{impl_val}</code></td>\
+                 <td>{test_val}</td>\
+                 <td>{proof_val}</td>\
+                 <td>{review_val}</td>\
+                 </tr>",
+            );
+        }
+        html.push_str("</tbody></table>");
+    }
+
+    Ok(Html(html))
+}
+
+// ─── JSON API Endpoints ──────────────────────────────────────────────────
+
+#[derive(serde::Serialize)]
+struct StatusJson {
+    pending: u32,
+    active: u32,
+    approval: u32,
+    merged: u32,
+    failed: u32,
+}
+
+async fn status_json(
+    State(state): State<Arc<ApiState>>,
+) -> Result<Json<StatusJson>, DashboardError> {
+    let db = state.db();
+    let store = TaskStore::new(db);
+    let tasks = store.list(None, None)?;
+    let mut pending = 0u32;
+    let mut active = 0u32;
+    let mut approval = 0u32;
+    let mut merged = 0u32;
+    let mut failed = 0u32;
+    for t in &tasks {
+        match &t.status {
+            thrum_core::task::TaskStatus::Pending => pending += 1,
+            thrum_core::task::TaskStatus::Implementing { .. }
+            | thrum_core::task::TaskStatus::Claimed { .. }
+            | thrum_core::task::TaskStatus::Reviewing { .. }
+            | thrum_core::task::TaskStatus::Integrating => active += 1,
+            thrum_core::task::TaskStatus::AwaitingApproval { .. } => approval += 1,
+            thrum_core::task::TaskStatus::Merged { .. } => merged += 1,
+            thrum_core::task::TaskStatus::Approved => {}
+            _ => failed += 1,
+        }
+    }
+    Ok(Json(StatusJson {
+        pending,
+        active,
+        approval,
+        merged,
+        failed,
+    }))
+}
+
+#[derive(serde::Serialize)]
+struct BudgetJson {
+    spent: f64,
+    ceiling: f64,
+    remaining: f64,
+}
+
+async fn budget_json(
+    State(state): State<Arc<ApiState>>,
+) -> Result<Json<BudgetJson>, DashboardError> {
+    let db = state.db();
+    let budget_store = BudgetStore::new(db);
+    let tracker = budget_store
+        .load()?
+        .unwrap_or_else(|| thrum_core::budget::BudgetTracker::new(1000.0));
+    let spent = tracker.total_spent();
+    let ceiling = tracker.ceiling_usd;
+    Ok(Json(BudgetJson {
+        spent,
+        ceiling,
+        remaining: ceiling - spent,
+    }))
+}
+
 // ─── Helpers ────────────────────────────────────────────────────────────
 
+/// Stage name, description, and docs anchor for pipeline timeline tooltips.
+const PIPELINE_STEPS: [(&str, &str, &str, &str); 9] = [
+    (
+        "P",
+        "Pending",
+        "Task is queued and waiting for an agent to pick it up.",
+        "pending",
+    ),
+    (
+        "I",
+        "Implementing",
+        "An agent is actively writing code for this task.",
+        "implementing",
+    ),
+    (
+        "G1",
+        "Gate 1: Quality",
+        "Automated quality checks: cargo fmt, clippy, and tests.",
+        "gate1",
+    ),
+    (
+        "R",
+        "Reviewing",
+        "AI reviewer is analyzing the implementation for correctness.",
+        "reviewing",
+    ),
+    (
+        "G2",
+        "Gate 2: Proof",
+        "Formal verification checks: Z3 and Rocq proofs.",
+        "gate2",
+    ),
+    (
+        "A",
+        "Awaiting Approval",
+        "Implementation passed gates; waiting for human approval.",
+        "approval",
+    ),
+    (
+        "Int",
+        "Integrating",
+        "Merging changes into the target branch.",
+        "integrating",
+    ),
+    (
+        "CI",
+        "Awaiting CI",
+        "PR created; waiting for CI pipeline to pass.",
+        "ci",
+    ),
+    (
+        "M",
+        "Merged",
+        "Task is complete and merged into the main branch.",
+        "merged",
+    ),
+];
+
+/// Status badge tooltip text: explains the current state and what happens next.
+/// Build a tooltip string for the task status badge.
+///
+/// For gate-failed statuses, includes the names of the failing checks
+/// so users can see WHY the gate failed without expanding the row.
+fn status_tooltip_string(status: &TaskStatus) -> String {
+    match status {
+        TaskStatus::Gate1Failed { report } => {
+            let failed: Vec<&str> = report
+                .checks
+                .iter()
+                .filter(|c| !c.passed)
+                .map(|c| c.name.as_str())
+                .collect();
+            if failed.is_empty() {
+                "Quality checks failed (fmt/clippy/test).".to_string()
+            } else {
+                format!("Gate 1 failed: {}", failed.join(", "))
+            }
+        }
+        TaskStatus::Gate2Failed { report } => {
+            let failed: Vec<&str> = report
+                .checks
+                .iter()
+                .filter(|c| !c.passed)
+                .map(|c| c.name.as_str())
+                .collect();
+            if failed.is_empty() {
+                "Proof checks failed (Z3/Rocq).".to_string()
+            } else {
+                format!("Gate 2 failed: {}", failed.join(", "))
+            }
+        }
+        TaskStatus::Gate3Failed { report } => {
+            let failed: Vec<&str> = report
+                .checks
+                .iter()
+                .filter(|c| !c.passed)
+                .map(|c| c.name.as_str())
+                .collect();
+            if failed.is_empty() {
+                "Integration checks failed.".to_string()
+            } else {
+                format!("Gate 3 failed: {}", failed.join(", "))
+            }
+        }
+        other => status_tooltip_static(other).to_string(),
+    }
+}
+
+fn status_tooltip_static(status: &TaskStatus) -> &'static str {
+    match status {
+        TaskStatus::Pending => "Queued for processing. An agent will claim this task next.",
+        TaskStatus::Claimed { .. } => "An agent has claimed this task and will begin shortly.",
+        TaskStatus::Implementing { .. } => "Agent is writing code. Next: Gate 1 quality checks.",
+        TaskStatus::Gate1Failed { .. } => {
+            "Quality checks failed (fmt/clippy/test). Task returns to Implementing for retry."
+        }
+        TaskStatus::Reviewing { .. } => {
+            "AI reviewer is checking the code. Next: Gate 2 proof checks."
+        }
+        TaskStatus::Gate2Failed { .. } => {
+            "Proof checks failed (Z3/Rocq). Task returns to Implementing for retry."
+        }
+        TaskStatus::AwaitingApproval { .. } => {
+            "All gates passed. A human must approve or reject this task."
+        }
+        TaskStatus::Approved => "Human approved. Task will be integrated into the target branch.",
+        TaskStatus::Rejected { .. } => {
+            "Human rejected. Task returns to Implementing with feedback."
+        }
+        TaskStatus::Integrating => {
+            "Merging changes into the target branch. Next: push and create PR."
+        }
+        TaskStatus::Gate3Failed { .. } => {
+            "Integration failed. Task returns to Implementing for retry."
+        }
+        TaskStatus::AwaitingCI { .. } => "PR created and pushed. Waiting for CI pipeline to pass.",
+        TaskStatus::CIFailed { .. } => "CI pipeline failed. Needs human review or retry.",
+        TaskStatus::Merged { .. } => "Task is complete. Changes are merged into main.",
+    }
+}
+
 /// Render an inline timeline showing pipeline progress as small step indicators.
+///
+/// Each step has a tooltip with the full stage name and description, and links
+/// to the relevant section of the help page.
 fn render_inline_timeline(status: &TaskStatus) -> String {
     let stage = match status {
         TaskStatus::Pending => 0,
@@ -1331,7 +2221,9 @@ fn render_inline_timeline(status: &TaskStatus) -> String {
         TaskStatus::Rejected { .. } => 5,
         TaskStatus::Integrating => 6,
         TaskStatus::Gate3Failed { .. } => 6,
-        TaskStatus::Merged { .. } => 7,
+        TaskStatus::AwaitingCI { .. } => 7,
+        TaskStatus::CIFailed { .. } => 7,
+        TaskStatus::Merged { .. } => 8,
     };
 
     let is_failed = matches!(
@@ -1339,12 +2231,12 @@ fn render_inline_timeline(status: &TaskStatus) -> String {
         TaskStatus::Gate1Failed { .. }
             | TaskStatus::Gate2Failed { .. }
             | TaskStatus::Gate3Failed { .. }
+            | TaskStatus::CIFailed { .. }
             | TaskStatus::Rejected { .. }
     );
 
-    let steps = ["P", "I", "G1", "R", "G2", "A", "Int", "M"];
-    let mut out = String::with_capacity(256);
-    for (i, &step) in steps.iter().enumerate() {
+    let mut out = String::with_capacity(512);
+    for (i, &(abbr, name, desc, anchor)) in PIPELINE_STEPS.iter().enumerate() {
         let class = if i < stage {
             "timeline-step done"
         } else if i == stage && is_failed {
@@ -1354,7 +2246,11 @@ fn render_inline_timeline(status: &TaskStatus) -> String {
         } else {
             "timeline-step"
         };
-        let _ = write!(out, "<span class=\"{class}\">{step}</span>");
+        let _ = write!(
+            out,
+            "<a href=\"/dashboard/help#{anchor}\" class=\"{class}\" \
+             title=\"{name}: {desc}\">{abbr}</a>",
+        );
     }
     out
 }
@@ -1386,19 +2282,96 @@ fn render_task_row_into(buf: &mut String, task: &thrum_core::task::Task) {
         ""
     };
 
+    // Dependency indicators
+    let dep_info = if task.has_dependencies() {
+        let dep_ids: Vec<String> = task
+            .depends_on
+            .iter()
+            .map(|d| d.prerequisite.to_string())
+            .collect();
+        format!(
+            " <span class=\"badge badge-pending\" title=\"Depends on: {}\">dep:{}</span>",
+            dep_ids.join(", "),
+            task.depends_on.len()
+        )
+    } else {
+        String::new()
+    };
+
+    // Trust badge for tasks with trust assessment
+    let trust_badge = if let TaskStatus::AwaitingApproval { ref summary } = task.status {
+        if let Some(ref ta) = summary.trust_assessment {
+            let cls = risk_css_class(ta.overall_risk);
+            format!(
+                " <span class=\"badge-trust {cls}\">{}</span>",
+                ta.overall_risk
+            )
+        } else {
+            String::new()
+        }
+    } else {
+        String::new()
+    };
+
+    let batch_info = if let Some(ref barrier) = task.batch_barrier {
+        format!(
+            " <span class=\"badge badge-implementing\" title=\"Batch: {}\">B{}</span>",
+            escape_html(&barrier.name),
+            barrier.order
+        )
+    } else {
+        String::new()
+    };
+
+    let badge_tip = escape_html(&status_tooltip_string(&task.status));
     let _ = write!(
         buf,
         "<tr id=\"task-row-{id}\" class=\"task-row\">\
          <td class=\"task-id\">TASK-{id:04}</td>\
          <td>{repo}</td>\
-         <td>{title}</td>\
-         <td><span class=\"badge badge-{label}\">{label}</span></td>\
+         <td>{title}{dep_info}{batch_info}{trust_badge}</td>\
+         <td><span class=\"badge badge-{label}\" title=\"{badge_tip}\">{label}</span></td>\
          <td><div class=\"timeline\">{timeline}</div></td>\
          <td><span class=\"{retry_class}\" title=\"{retries} of {max_retries} retries used\">\
          {retries}/{max_retries}</span></td>\
          <td><div class=\"actions\">",
     );
 
+    // PR link for AwaitingCI and CIFailed tasks
+    match &task.status {
+        TaskStatus::AwaitingCI {
+            pr_number,
+            pr_url,
+            ci_attempts,
+            ..
+        } => {
+            let url = escape_html(pr_url);
+            let _ = write!(
+                buf,
+                "<a href=\"{url}\" target=\"_blank\" rel=\"noopener\" \
+                 class=\"btn btn-pr btn-sm\" \
+                 title=\"PR #{pr_number} (CI attempt {ci_attempts})\">\
+                 PR #{pr_number}</a>",
+            );
+        }
+        TaskStatus::CIFailed {
+            pr_number,
+            pr_url,
+            ci_attempts,
+            ..
+        } => {
+            let url = escape_html(pr_url);
+            let _ = write!(
+                buf,
+                "<a href=\"{url}\" target=\"_blank\" rel=\"noopener\" \
+                 class=\"btn btn-pr btn-sm\" \
+                 title=\"PR #{pr_number} (CI failed after {ci_attempts} attempts)\">\
+                 PR #{pr_number}</a>",
+            );
+        }
+        _ => {}
+    }
+
     // Review link for AwaitingApproval tasks
     if task.status.needs_human() {
         let _ = write!(
@@ -1414,7 +2387,7 @@ fn render_task_row_into(buf: &mut String, task: &thrum_core::task::Task) {
         let _ = write!(
             buf,
             "<button class=\"btn btn-retry btn-sm\" \
-             onclick=\"retryTask({id})\" \
+             onclick=\"retryTask({id}, this)\" \
              title=\"Reset to pending and clear retry count\">\u{21bb} Retry</button>",
         );
     }
@@ -1436,13 +2409,187 @@ fn render_task_row_into(buf: &mut String, task: &thrum_core::task::Task) {
     let _ = write!(
         buf,
         "<button class=\"btn btn-reject btn-sm\" \
-         onclick=\"deleteTask({id})\" \
+         onclick=\"deleteTask({id}, this)\" \
          title=\"Delete TASK-{id:04}\">\u{2715}</button>",
     );
 
     buf.push_str("</div></td></tr>");
 }
 
+/// GET /dashboard/partials/dependencies — dependency graph visualization
+/// and conflict warnings between tasks.
+async fn dependencies_partial(
+    State(state): State<Arc<ApiState>>,
+) -> Result<Html<String>, DashboardError> {
+    let db = state.db();
+    let store = TaskStore::new(db);
+    let tasks = store.list(None, None)?;
+
+    let mut html = String::with_capacity(4096);
+
+    if tasks.is_empty() {
+        html.push_str("<div class=\"empty\">No tasks to analyze</div>");
+        return Ok(Html(html));
+    }
+
+    // Build dependency graph
+    let graph = thrum_core::dependency::DependencyGraph::from_tasks(&tasks);
+
+    // Check for cycles
+    if let Some(cycle) = graph.find_cycle() {
+        html.push_str("<div class=\"conflict-warning\">");
+        html.push_str("<strong>\u{26a0} Dependency Cycle Detected:</strong> ");
+        let cycle_str: Vec<String> = cycle.iter().map(|id| format!("TASK-{id:04}")).collect();
+        let _ = write!(html, "{}", cycle_str.join(" \u{2192} "));
+        html.push_str("</div>");
+    }
+
+    // Predicted file conflicts
+    let pending_tasks: Vec<_> = tasks
+        .iter()
+        .filter(|t| matches!(t.status, TaskStatus::Pending))
+        .cloned()
+        .collect();
+    let conflicts = thrum_core::dependency::predict_conflicts(&pending_tasks);
+
+    if !conflicts.is_empty() {
+        html.push_str("<div class=\"conflict-section\">");
+        html.push_str("<h4>\u{26a0} Predicted File Conflicts</h4>");
+        html.push_str("<table class=\"task-table\"><thead><tr>");
+        html.push_str("<th>File</th><th>Task A</th><th>Task B</th><th>Severity</th>");
+        html.push_str("</tr></thead><tbody>");
+
+        for conflict in &conflicts {
+            let path_esc = escape_html(&conflict.path.display().to_string());
+            let _ = write!(
+                html,
+                "<tr>\
+                 <td><code>{path_esc}</code></td>\
+                 <td>{}</td>\
+                 <td>{}</td>\
+                 <td><span class=\"badge badge-gate1-failed\">{}</span></td>\
+                 </tr>",
+                conflict.task_a, conflict.task_b, conflict.severity,
+            );
+        }
+        html.push_str("</tbody></table></div>");
+    }
+
+    // Dependency graph visualization (text-based DAG)
+    let completed: std::collections::HashSet<i64> = tasks
+        .iter()
+        .filter(|t| t.status.is_terminal())
+        .map(|t| t.id.0)
+        .collect();
+
+    let has_deps: Vec<_> = tasks.iter().filter(|t| !t.depends_on.is_empty()).collect();
+
+    if has_deps.is_empty() && conflicts.is_empty() {
+        html.push_str("<div class=\"empty\">No dependencies or conflicts configured</div>");
+        return Ok(Html(html));
+    }
+
+    if !has_deps.is_empty() {
+        html.push_str("<div class=\"dependency-graph\">");
+        html.push_str("<h4>Task Dependencies</h4>");
+        html.push_str("<table class=\"task-table\"><thead><tr>");
+        html.push_str("<th>Task</th><th>Depends On</th><th>Status</th><th>Ready</th>");
+        html.push_str("</tr></thead><tbody>");
+
+        for task in &has_deps {
+            let deps_str: Vec<String> = task
+                .depends_on
+                .iter()
+                .map(|d| {
+                    let kind = &d.kind;
+                    format!("{} ({})", d.prerequisite, kind)
+                })
+                .collect();
+
+            let is_ready = task.dependencies_satisfied(&completed);
+            let ready_badge = if is_ready {
+                "<span class=\"badge badge-merged\">\u{2713} ready</span>"
+            } else {
+                "<span class=\"badge badge-pending\">\u{23f3} blocked</span>"
+            };
+
+            let title_esc = escape_html(&task.title);
+            let _ = write!(
+                html,
+                "<tr>\
+                 <td><strong>{}</strong><br><small>{title_esc}</small></td>\
+                 <td>{}</td>\
+                 <td><span class=\"badge badge-{}\">{}</span></td>\
+                 <td>{ready_badge}</td>\
+                 </tr>",
+                task.id,
+                deps_str.join(", "),
+                task.status.label(),
+                task.status.label(),
+            );
+        }
+
+        html.push_str("</tbody></table></div>");
+    }
+
+    // Batch barrier visualization
+    let batched: Vec<_> = tasks.iter().filter(|t| t.batch_barrier.is_some()).collect();
+
+    if !batched.is_empty() {
+        html.push_str("<div class=\"batch-section\">");
+        html.push_str("<h4>Batch Barriers</h4>");
+
+        let mut batches: std::collections::HashMap<u32, Vec<&thrum_core::task::Task>> =
+            std::collections::HashMap::new();
+        for task in &batched {
+            if let Some(ref barrier) = task.batch_barrier {
+                batches.entry(barrier.order).or_default().push(task);
+            }
+        }
+
+        let mut orders: Vec<u32> = batches.keys().copied().collect();
+        orders.sort();
+
+        for order in orders {
+            let batch_tasks = &batches[&order];
+            let batch_name = batch_tasks
+                .first()
+                .and_then(|t| t.batch_barrier.as_ref())
+                .map(|b| b.name.as_str())
+                .unwrap_or("unnamed");
+
+            let all_done = batch_tasks.iter().all(|t| completed.contains(&t.id.0));
+            let status = if all_done {
+                "\u{2705} complete"
+            } else {
+                "\u{23f3} in progress"
+            };
+
+            let _ = write!(
+                html,
+                "<div class=\"batch-group\">\
+                 <strong>Batch {order}: {}</strong> — {status}<br>",
+                escape_html(batch_name),
+            );
+
+            for task in batch_tasks {
+                let _ = write!(
+                    html,
+                    "<span class=\"badge badge-{}\" style=\"margin:2px\">{}: {}</span> ",
+                    task.status.label(),
+                    task.id,
+                    escape_html(&task.title),
+                );
+            }
+
+            html.push_str("</div>");
+        }
+        html.push_str("</div>");
+    }
+
+    Ok(Html(html))
+}
+
 /// Minimal HTML escaping for dynamic content.
 fn escape_html(s: &str) -> String {
     s.replace('&', "&amp;")
diff --git a/crates/thrum-api/src/lib.rs b/crates/thrum-api/src/lib.rs
index 9b83f65..0e92484 100644
--- a/crates/thrum-api/src/lib.rs
+++ b/crates/thrum-api/src/lib.rs
@@ -4,9 +4,9 @@
 //! Built with axum for async HTTP serving.
 //! Includes an embedded HTMX-powered dashboard at `/dashboard`.
 
-mod a2a;
 mod dashboard;
 mod sse;
+mod ws;
 
 use axum::{
     Json, Router,
@@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize};
 use std::path::PathBuf;
 use std::sync::Arc;
 use thrum_core::repo::ReposConfig;
-use thrum_core::task::{RepoName, Task, TaskId, TaskStatus};
+use thrum_core::task::{GateReport, RepoName, Task, TaskId, TaskStatus};
 use thrum_core::telemetry::{TraceFilter, TraceReader};
 use thrum_db::task_store::TaskStore;
 use thrum_runner::event_bus::EventBus;
@@ -120,14 +120,23 @@ pub fn api_router(state: Arc<ApiState>) -> Router {
         .route("/api/v1/tasks/{id}/diff", get(get_task_diff))
         .route("/api/v1/tasks/{id}/approve", post(approve_task))
         .route("/api/v1/tasks/{id}/reject", post(reject_task))
+        .route(
+            "/api/v1/tasks/{id}/spec",
+            get(get_task_spec).post(set_task_spec),
+        )
+        .route(
+            "/api/v1/tasks/{id}/dependencies",
+            post(add_dependency).get(get_dependencies),
+        )
+        .route("/api/v1/dependencies/graph", get(dependency_graph))
         .route("/api/v1/traces", get(list_traces))
-        // SSE event stream
+        .route("/api/v1/traces/records", get(list_trace_records))
+        .route("/api/v1/traces/matrix", get(trace_matrix))
+        .route("/api/v1/sync", post(trigger_sync))
+        // SSE event stream (kept for backwards compatibility)
         .route("/api/v1/events/stream", get(sse::event_stream))
-        // A2A protocol endpoints
-        .route("/.well-known/agent.json", get(a2a::agent_card))
-        .route("/a2a", post(a2a::jsonrpc_handler))
-        .route("/a2a/stream", post(a2a::streaming_handler))
-        .route("/a2a/subscribe/{task_id}", get(a2a::subscribe_handler))
+        // WebSocket endpoint for bidirectional communication
+        .route("/ws", get(ws::ws_handler))
         // Embedded web dashboard
         .merge(dashboard::dashboard_router())
         .layer(TraceLayer::new_for_http())
@@ -288,12 +297,72 @@ struct TaskResponse {
     retry_count: u32,
     requirement_id: Option<String>,
     acceptance_criteria: Vec<String>,
+    tagged_criteria: Vec<thrum_core::verification::TaggedCriterion>,
+    /// Structured verification report aggregating per-criterion results.
+    /// `None` if no tagged criteria are present.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    verification_report: Option<thrum_core::verification::VerificationReport>,
+    /// Task dependency ordering — IDs of tasks that must finish first.
+    depends_on: Vec<DependencyInfo>,
+    /// Batch barrier for grouping tasks into ordered waves.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    batch_barrier: Option<BatchBarrierInfo>,
+    /// Full gate report when the task is in a gate-failed state.
+    /// Includes check names, stdout, stderr, and exit codes.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    gate_report: Option<GateReport>,
+    /// Names of the failing checks (convenience for tooltips and summaries).
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    failing_checks: Vec<String>,
+    /// Historical gate reports from previous retry cycles, oldest first.
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    gate_history: Vec<GateReport>,
     created_at: String,
     updated_at: String,
 }
 
+#[derive(Serialize)]
+struct DependencyInfo {
+    prerequisite: i64,
+    kind: String,
+}
+
+#[derive(Serialize)]
+struct BatchBarrierInfo {
+    name: String,
+    order: u32,
+}
+
 impl From<Task> for TaskResponse {
     fn from(t: Task) -> Self {
+        let verification_report = if t.tagged_criteria.is_empty() {
+            None
+        } else {
+            Some(thrum_core::verification::VerificationReport::from_criteria(
+                t.id.0,
+                &t.tagged_criteria,
+            ))
+        };
+        let depends_on = t
+            .depends_on
+            .iter()
+            .map(|d| DependencyInfo {
+                prerequisite: d.prerequisite.0,
+                kind: d.kind.to_string(),
+            })
+            .collect();
+        let batch_barrier = t.batch_barrier.as_ref().map(|b| BatchBarrierInfo {
+            name: b.name.clone(),
+            order: b.order,
+        });
+        let gate_report = t.status.gate_report().cloned();
+        let failing_checks = t
+            .status
+            .failing_check_names()
+            .into_iter()
+            .map(String::from)
+            .collect();
+        let gate_history = t.gate_history.clone();
         Self {
             id: t.id.0,
             repo: t.repo.to_string(),
@@ -303,6 +372,13 @@ impl From<Task> for TaskResponse {
             retry_count: t.retry_count,
             requirement_id: t.requirement_id,
             acceptance_criteria: t.acceptance_criteria,
+            tagged_criteria: t.tagged_criteria,
+            verification_report,
+            depends_on,
+            batch_barrier,
+            gate_report,
+            failing_checks,
+            gate_history,
             created_at: t.created_at.to_rfc3339(),
             updated_at: t.updated_at.to_rfc3339(),
         }
@@ -352,12 +428,26 @@ struct CreateTaskRequest {
     requirement_id: Option<String>,
     #[serde(default)]
     acceptance_criteria: Vec<String>,
+    /// Task IDs this task depends on (hard dependencies by default).
+    #[serde(default)]
+    depends_on: Vec<i64>,
+    /// Optional batch barrier for grouping tasks.
+    #[serde(default)]
+    batch_barrier: Option<CreateBatchBarrier>,
+}
+
+#[derive(Deserialize)]
+struct CreateBatchBarrier {
+    name: String,
+    order: u32,
 }
 
 async fn create_task(
     State(state): State<Arc<ApiState>>,
     Json(req): Json<CreateTaskRequest>,
 ) -> Result<(StatusCode, Json<TaskResponse>), AppError> {
+    use thrum_core::dependency::{BatchBarrier, TaskDependency};
+
     let repo_name = RepoName::new(&req.repo);
 
     let db = state.db();
@@ -365,7 +455,22 @@ async fn create_task(
 
     let mut task = Task::new(repo_name, req.title, req.description);
     task.requirement_id = req.requirement_id;
-    task.acceptance_criteria = req.acceptance_criteria;
+    // Enrich criteria with verification tags if not already tagged
+    task.acceptance_criteria = thrum_core::verification::enrich_criteria(&req.acceptance_criteria);
+    let audit = thrum_core::verification::audit_criteria(&task.acceptance_criteria);
+    task.tagged_criteria = audit.tagged_criteria;
+
+    // Set up dependencies
+    task.depends_on = req
+        .depends_on
+        .iter()
+        .map(|&id| TaskDependency::hard(TaskId(id)))
+        .collect();
+
+    // Set up batch barrier
+    if let Some(bb) = req.batch_barrier {
+        task.batch_barrier = Some(BatchBarrier::new(bb.name, bb.order));
+    }
 
     let task = store.insert(task)?;
     Ok((StatusCode::CREATED, Json(TaskResponse::from(task))))
@@ -423,6 +528,252 @@ async fn reject_task(
     Ok(Json(TaskResponse::from(task)))
 }
 
+// ─── Spec ────────────────────────────────────────────────────────────────
+
+/// GET /api/v1/tasks/{id}/spec — view a task's structured spec
+async fn get_task_spec(
+    State(state): State<Arc<ApiState>>,
+    Path(id): Path<i64>,
+) -> Result<Json<Option<thrum_core::spec::Spec>>, AppError> {
+    let db = state.db();
+    let store = TaskStore::new(db);
+    let task = store
+        .get(&TaskId(id))?
+        .ok_or_else(|| AppError::internal(format!("task {id} not found")))?;
+    Ok(Json(task.spec))
+}
+
+#[derive(Deserialize)]
+struct SetSpecRequest {
+    spec: thrum_core::spec::Spec,
+}
+
+/// POST /api/v1/tasks/{id}/spec — set a task's structured spec
+async fn set_task_spec(
+    State(state): State<Arc<ApiState>>,
+    Path(id): Path<i64>,
+    Json(req): Json<SetSpecRequest>,
+) -> Result<Json<TaskResponse>, AppError> {
+    let db = state.db();
+    let store = TaskStore::new(db);
+    let mut task = store
+        .get(&TaskId(id))?
+        .ok_or_else(|| AppError::internal(format!("task {id} not found")))?;
+
+    task.acceptance_criteria = req.spec.tagged_acceptance_criteria();
+    let audit = thrum_core::verification::audit_criteria(&task.acceptance_criteria);
+    task.tagged_criteria = audit.tagged_criteria;
+
+    if task.requirement_id.is_none() {
+        task.requirement_id = req.spec.requirements.first().map(|r| r.id.clone());
+    }
+
+    task.spec = Some(req.spec);
+    task.updated_at = Utc::now();
+    store.update(&task)?;
+
+    Ok(Json(TaskResponse::from(task)))
+}
+
+// ─── Dependencies ────────────────────────────────────────────────────────
+
+#[derive(Deserialize)]
+struct AddDependencyRequest {
+    prerequisite_id: i64,
+    #[serde(default = "default_dep_kind")]
+    kind: String,
+}
+
+fn default_dep_kind() -> String {
+    "hard".into()
+}
+
+/// POST /api/v1/tasks/{id}/dependencies — add a dependency.
+async fn add_dependency(
+    State(state): State<Arc<ApiState>>,
+    Path(id): Path<i64>,
+    Json(req): Json<AddDependencyRequest>,
+) -> Result<Json<TaskResponse>, AppError> {
+    use thrum_core::dependency::{DependencyKind, TaskDependency};
+
+    let db = state.db();
+    let store = TaskStore::new(db);
+
+    let mut task = store
+        .get(&TaskId(id))?
+        .ok_or_else(|| AppError::internal(format!("task {id} not found")))?;
+
+    // Verify the prerequisite task exists
+    store.get(&TaskId(req.prerequisite_id))?.ok_or_else(|| {
+        AppError::internal(format!(
+            "prerequisite task {} not found",
+            req.prerequisite_id
+        ))
+    })?;
+
+    // Prevent self-dependency
+    if req.prerequisite_id == id {
+        return Err(AppError::internal("task cannot depend on itself"));
+    }
+
+    let kind = match req.kind.as_str() {
+        "soft" => DependencyKind::SoftOrder,
+        _ => DependencyKind::MustFinishBefore,
+    };
+
+    // Check for duplicate
+    let already_exists = task
+        .depends_on
+        .iter()
+        .any(|d| d.prerequisite.0 == req.prerequisite_id);
+    if !already_exists {
+        task.depends_on.push(TaskDependency {
+            prerequisite: TaskId(req.prerequisite_id),
+            kind,
+        });
+
+        // Check for cycles in the resulting graph
+        let all_tasks = store.list(None, None)?;
+        let mut check_tasks = all_tasks;
+        // Update the task in the list for the cycle check
+        if let Some(pos) = check_tasks.iter().position(|t| t.id.0 == id) {
+            check_tasks[pos] = task.clone();
+        }
+        let graph = thrum_core::dependency::DependencyGraph::from_tasks(&check_tasks);
+        if graph.find_cycle().is_some() {
+            return Err(AppError::internal(format!(
+                "adding dependency on TASK-{:04} would create a cycle",
+                req.prerequisite_id
+            )));
+        }
+
+        task.updated_at = Utc::now();
+        store.update(&task)?;
+    }
+
+    Ok(Json(TaskResponse::from(task)))
+}
+
+/// GET /api/v1/tasks/{id}/dependencies — get dependencies for a task.
+async fn get_dependencies(
+    State(state): State<Arc<ApiState>>,
+    Path(id): Path<i64>,
+) -> Result<Json<Vec<DependencyInfo>>, AppError> {
+    let db = state.db();
+    let store = TaskStore::new(db);
+
+    let task = store
+        .get(&TaskId(id))?
+        .ok_or_else(|| AppError::internal(format!("task {id} not found")))?;
+
+    let deps: Vec<DependencyInfo> = task
+        .depends_on
+        .iter()
+        .map(|d| DependencyInfo {
+            prerequisite: d.prerequisite.0,
+            kind: d.kind.to_string(),
+        })
+        .collect();
+
+    Ok(Json(deps))
+}
+
+/// GET /api/v1/dependencies/graph — full dependency graph with ordering info.
+#[derive(Serialize)]
+struct DependencyGraphResponse {
+    nodes: Vec<DependencyNodeInfo>,
+    edges: Vec<DependencyEdgeInfo>,
+    topological_order: Vec<i64>,
+    has_cycle: bool,
+    predicted_conflicts: Vec<PredictedConflictInfo>,
+}
+
+#[derive(Serialize)]
+struct DependencyNodeInfo {
+    id: i64,
+    title: String,
+    status: String,
+    is_ready: bool,
+}
+
+#[derive(Serialize)]
+struct DependencyEdgeInfo {
+    from: i64,
+    to: i64,
+    kind: String,
+}
+
+#[derive(Serialize)]
+struct PredictedConflictInfo {
+    task_a: i64,
+    task_b: i64,
+    path: String,
+    severity: String,
+}
+
+async fn dependency_graph(
+    State(state): State<Arc<ApiState>>,
+) -> Result<Json<DependencyGraphResponse>, AppError> {
+    let db = state.db();
+    let store = TaskStore::new(db);
+    let tasks = store.list(None, None)?;
+
+    let completed: std::collections::HashSet<i64> = tasks
+        .iter()
+        .filter(|t| t.status.is_terminal())
+        .map(|t| t.id.0)
+        .collect();
+
+    let graph = thrum_core::dependency::DependencyGraph::from_tasks(&tasks);
+    let has_cycle = graph.find_cycle().is_some();
+    let topological_order = graph.topological_sort().unwrap_or_default();
+
+    let nodes: Vec<DependencyNodeInfo> = tasks
+        .iter()
+        .map(|t| DependencyNodeInfo {
+            id: t.id.0,
+            title: t.title.clone(),
+            status: t.status.label().to_string(),
+            is_ready: t.dependencies_satisfied(&completed),
+        })
+        .collect();
+
+    let mut edges = Vec::new();
+    for task in &tasks {
+        for dep in &task.depends_on {
+            edges.push(DependencyEdgeInfo {
+                from: dep.prerequisite.0,
+                to: task.id.0,
+                kind: dep.kind.to_string(),
+            });
+        }
+    }
+
+    let pending_tasks: Vec<_> = tasks
+        .iter()
+        .filter(|t| matches!(t.status, TaskStatus::Pending))
+        .cloned()
+        .collect();
+    let conflicts = thrum_core::dependency::predict_conflicts(&pending_tasks);
+    let predicted_conflicts: Vec<PredictedConflictInfo> = conflicts
+        .iter()
+        .map(|c| PredictedConflictInfo {
+            task_a: c.task_a.0,
+            task_b: c.task_b.0,
+            path: c.path.display().to_string(),
+            severity: c.severity.to_string(),
+        })
+        .collect();
+
+    Ok(Json(DependencyGraphResponse {
+        nodes,
+        edges,
+        topological_order,
+        has_cycle,
+        predicted_conflicts,
+    }))
+}
+
 // ─── Diff ────────────────────────────────────────────────────────────────
 
 /// GET /api/v1/tasks/{id}/diff
@@ -493,6 +844,7 @@ async fn list_traces(
         level: query.level,
         target_prefix: query.target,
         field_filter: None,
+        pipeline_only: false,
     };
 
     let events = reader.read_events(&filter)?;
@@ -507,11 +859,102 @@ async fn list_traces(
     })))
 }
 
+// ─── Traceability Records ────────────────────────────────────────────
+
+#[derive(Deserialize)]
+struct TraceRecordsQuery {
+    task_id: Option<i64>,
+    requirement_id: Option<String>,
+}
+
+/// GET /api/v1/traces/records — list traceability records filtered by task or requirement.
+async fn list_trace_records(
+    State(state): State<Arc<ApiState>>,
+    Query(query): Query<TraceRecordsQuery>,
+) -> Result<Json<serde_json::Value>, AppError> {
+    let db = state.db();
+    let store = thrum_db::trace_store::TraceStore::new(db);
+    let records = store.list_all(query.task_id, query.requirement_id.as_deref())?;
+
+    Ok(Json(serde_json::json!({
+        "count": records.len(),
+        "records": records,
+    })))
+}
+
+/// GET /api/v1/traces/matrix — build and return a TraceabilityMatrix.
+async fn trace_matrix(
+    State(state): State<Arc<ApiState>>,
+    Query(query): Query<TraceRecordsQuery>,
+) -> Result<Json<thrum_core::traceability::TraceabilityMatrix>, AppError> {
+    let db = state.db();
+    let store = thrum_db::trace_store::TraceStore::new(db);
+    let records = store.list_all(query.task_id, query.requirement_id.as_deref())?;
+    let matrix = thrum_core::traceability::TraceabilityMatrix::from_records(&records);
+    Ok(Json(matrix))
+}
+
+// ─── Sync ─────────────────────────────────────────────────────────────
+
+#[derive(Deserialize)]
+struct SyncRequest {
+    repo: String,
+}
+
+#[derive(Serialize)]
+struct SyncResponse {
+    success: bool,
+    message: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    branches_rebased: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    branches_conflicted: Option<u32>,
+}
+
+async fn trigger_sync(
+    State(state): State<Arc<ApiState>>,
+    Json(req): Json<SyncRequest>,
+) -> Result<Json<SyncResponse>, AppError> {
+    let repo_name = RepoName::new(&req.repo);
+
+    let repos_config = state.repos_config()?;
+    let repo_config = repos_config
+        .get(&repo_name)
+        .ok_or_else(|| AppError::not_found(format!("repo '{}' not found in config", req.repo)))?;
+
+    let db = state.db();
+    let task_store = TaskStore::new(db);
+
+    match thrum_runner::sync::trigger_manual_sync(
+        &repo_config.path,
+        &repo_name,
+        &task_store,
+        &state.event_bus,
+    ) {
+        Ok(record) => Ok(Json(SyncResponse {
+            success: true,
+            message: format!(
+                "Sync completed: {} branches rebased, {} conflicts",
+                record.branches_rebased, record.branches_conflicted
+            ),
+            branches_rebased: Some(record.branches_rebased),
+            branches_conflicted: Some(record.branches_conflicted),
+        })),
+        Err(e) => Ok(Json(SyncResponse {
+            success: false,
+            message: format!("Sync failed: {e}"),
+            branches_rebased: None,
+            branches_conflicted: None,
+        })),
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
     use axum::body::Body;
     use axum::http::Request;
+    use thrum_core::task::{CheckpointSummary, GateLevel, GateReport};
     use tower::ServiceExt;
 
     fn test_state() -> (Arc<ApiState>, tempfile::TempDir) {
@@ -793,7 +1236,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn unified_dashboard_includes_sse() {
+    async fn unified_dashboard_includes_websocket_and_sse_fallback() {
         let (state, _dir) = test_state();
         let app = api_router(state);
 
@@ -812,9 +1255,14 @@ mod tests {
             .await
             .unwrap();
         let html = String::from_utf8(body.to_vec()).unwrap();
-        // Unified dashboard includes SSE wiring from the old live page
+        // Dashboard connects via WebSocket first
+        assert!(html.contains("WebSocket"));
+        assert!(html.contains("connectWebSocket"));
+        assert!(html.contains("/ws"));
+        // SSE fallback is still present
         assert!(html.contains("EventSource"));
         assert!(html.contains("/api/v1/events/stream"));
+        assert!(html.contains("connectSSE"));
         assert!(html.contains("agent-grid"));
         // Also still has HTMX polling partials
         assert!(html.contains("partials/budget"));
@@ -953,124 +1401,6 @@ mod tests {
         assert_eq!(body["status"], "ok");
     }
 
-    #[tokio::test]
-    async fn serve_with_shared_db_exposes_a2a_agent_card() {
-        // This exercises the exact ApiState::with_shared_db path that --serve uses
-        let dir = tempfile::tempdir().unwrap();
-        let db_path = dir.path().join("test.redb");
-        let shared_db = Arc::new(thrum_db::open_db(&db_path).unwrap());
-        let event_bus = EventBus::new();
-        let state = Arc::new(ApiState::with_shared_db(
-            shared_db,
-            dir.path().join("traces"),
-            None,
-            event_bus,
-        ));
-
-        let (base_url, _handle) = start_serve(state).await;
-
-        let resp = reqwest::get(format!("{base_url}/.well-known/agent.json"))
-            .await
-            .unwrap();
-        assert_eq!(resp.status(), 200);
-
-        let card: serde_json::Value = resp.json().await.unwrap();
-        assert_eq!(card["name"], "Thrum");
-        assert_eq!(card["capabilities"]["streaming"], true);
-        // Agent card should advertise 3 skills: implement, review, status
-        assert_eq!(card["skills"].as_array().unwrap().len(), 3);
-    }
-
-    #[tokio::test]
-    async fn serve_a2a_roundtrip_via_tcp() {
-        // Full A2A roundtrip through real HTTP: SendMessage → GetTask
-        let dir = tempfile::tempdir().unwrap();
-        let db_path = dir.path().join("test.redb");
-        let shared_db = Arc::new(thrum_db::open_db(&db_path).unwrap());
-        let event_bus = EventBus::new();
-        let state = Arc::new(ApiState::with_shared_db(
-            shared_db,
-            dir.path().join("traces"),
-            None,
-            event_bus,
-        ));
-
-        let (base_url, _handle) = start_serve(state).await;
-
-        let client = reqwest::Client::new();
-
-        // 1. Create a task via A2A SendMessage
-        let send_body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 1,
-            "method": "a2a.SendMessage",
-            "params": {
-                "message": {
-                    "message_id": "test-m1",
-                    "role": "user",
-                    "parts": [{"type": "text", "text": "Verify --serve flag\nEnd-to-end test"}]
-                },
-                "metadata": {"repo": "test-repo"}
-            }
-        });
-
-        let resp = client
-            .post(format!("{base_url}/a2a"))
-            .json(&send_body)
-            .send()
-            .await
-            .unwrap();
-        assert_eq!(resp.status(), 200);
-
-        let rpc_resp: serde_json::Value = resp.json().await.unwrap();
-        assert!(rpc_resp["error"].is_null(), "expected no error: {rpc_resp}");
-        let task_id = rpc_resp["result"]["id"].as_str().unwrap().to_string();
-        assert!(task_id.starts_with("thrum-"));
-        assert_eq!(rpc_resp["result"]["status"]["state"], "submitted");
-        assert_eq!(rpc_resp["result"]["metadata"]["repo"], "test-repo");
-
-        // 2. Retrieve the same task via A2A GetTask
-        let get_body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 2,
-            "method": "a2a.GetTask",
-            "params": {"task_id": task_id}
-        });
-
-        let resp = client
-            .post(format!("{base_url}/a2a"))
-            .json(&get_body)
-            .send()
-            .await
-            .unwrap();
-        assert_eq!(resp.status(), 200);
-
-        let rpc_resp: serde_json::Value = resp.json().await.unwrap();
-        assert!(rpc_resp["error"].is_null());
-        assert_eq!(rpc_resp["result"]["id"], task_id);
-
-        // 3. Verify the task also shows up in ListTasks
-        let list_body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 3,
-            "method": "a2a.ListTasks",
-            "params": {}
-        });
-
-        let resp = client
-            .post(format!("{base_url}/a2a"))
-            .json(&list_body)
-            .send()
-            .await
-            .unwrap();
-        assert_eq!(resp.status(), 200);
-
-        let rpc_resp: serde_json::Value = resp.json().await.unwrap();
-        let tasks = rpc_resp["result"].as_array().unwrap();
-        assert_eq!(tasks.len(), 1);
-        assert_eq!(tasks[0]["id"], task_id);
-    }
-
     #[tokio::test]
     async fn serve_shared_event_bus_delivers_events() {
         // Verify that events emitted on a shared EventBus reach the API's SSE endpoint.
@@ -1121,61 +1451,6 @@ mod tests {
         ));
     }
 
-    #[tokio::test]
-    async fn serve_rest_and_a2a_share_state() {
-        // Verify that a task created via REST API is visible through A2A and vice versa.
-        // This confirms the shared Arc<Database> is working correctly under --serve.
-        let dir = tempfile::tempdir().unwrap();
-        let db_path = dir.path().join("test.redb");
-        let shared_db = Arc::new(thrum_db::open_db(&db_path).unwrap());
-        let event_bus = EventBus::new();
-        let state = Arc::new(ApiState::with_shared_db(
-            shared_db,
-            dir.path().join("traces"),
-            None,
-            event_bus,
-        ));
-
-        let (base_url, _handle) = start_serve(state).await;
-        let client = reqwest::Client::new();
-
-        // Create via REST
-        let create_body = serde_json::json!({
-            "repo": "cross-check",
-            "title": "REST-created task",
-            "description": "Should be visible via A2A"
-        });
-        let resp = client
-            .post(format!("{base_url}/api/v1/tasks"))
-            .json(&create_body)
-            .send()
-            .await
-            .unwrap();
-        assert_eq!(resp.status(), 201);
-        let task: serde_json::Value = resp.json().await.unwrap();
-        let task_id = task["id"].as_i64().unwrap();
-
-        // Retrieve via A2A GetTask
-        let get_body = serde_json::json!({
-            "jsonrpc": "2.0",
-            "id": 1,
-            "method": "a2a.GetTask",
-            "params": {"task_id": format!("thrum-{task_id}")}
-        });
-        let resp = client
-            .post(format!("{base_url}/a2a"))
-            .json(&get_body)
-            .send()
-            .await
-            .unwrap();
-        assert_eq!(resp.status(), 200);
-
-        let rpc_resp: serde_json::Value = resp.json().await.unwrap();
-        assert!(rpc_resp["error"].is_null());
-        assert_eq!(rpc_resp["result"]["id"], format!("thrum-{task_id}"));
-        assert_eq!(rpc_resp["result"]["metadata"]["repo"], "cross-check");
-    }
-
     #[tokio::test]
     async fn review_page_returns_404_for_missing_task() {
         let (state, _dir) = test_state();
@@ -1249,25 +1524,14 @@ mod tests {
                     gate1_report: GateReport {
                         level: GateLevel::Quality,
                         checks: vec![
-                            CheckResult {
-                                name: "cargo_fmt".into(),
-                                passed: true,
-                                stdout: String::new(),
-                                stderr: String::new(),
-                                exit_code: 0,
-                            },
-                            CheckResult {
-                                name: "cargo_test".into(),
-                                passed: true,
-                                stdout: "test result: ok".into(),
-                                stderr: String::new(),
-                                exit_code: 0,
-                            },
+                            CheckResult::simple("cargo_fmt", true, "", "", 0),
+                            CheckResult::simple("cargo_test", true, "test result: ok", "", 0),
                         ],
                         passed: true,
                         duration_secs: 12.5,
                     },
                     gate2_report: None,
+                    trust_assessment: None,
                 },
             };
             store.update(&task).unwrap();
@@ -1354,4 +1618,1040 @@ mod tests {
         let ct = response.headers().get("content-type").unwrap();
         assert_eq!(ct, "text/css; charset=utf-8");
     }
+
+    #[tokio::test]
+    async fn sync_endpoint_returns_error_for_unknown_repo() {
+        let (state, _dir) = test_state();
+        let app = api_router(state);
+
+        let body = serde_json::json!({ "repo": "nonexistent" });
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .method("POST")
+                    .uri("/api/v1/sync")
+                    .header("content-type", "application/json")
+                    .body(Body::from(serde_json::to_string(&body).unwrap()))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        // Config not set, so it should fail
+        assert_ne!(response.status(), StatusCode::OK);
+    }
+
+    #[tokio::test]
+    async fn dashboard_includes_sync_section() {
+        let (state, _dir) = test_state();
+        let app = api_router(state);
+
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/dashboard")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let html = String::from_utf8(body.to_vec()).unwrap();
+        assert!(html.contains("Remote Sync"));
+        assert!(html.contains("sync-controls"));
+    }
+
+    #[tokio::test]
+    async fn trace_records_endpoint_empty() {
+        let (state, _dir) = test_state();
+        let app = api_router(state);
+
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/api/v1/traces/records")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        assert_eq!(json["count"], 0);
+    }
+
+    #[tokio::test]
+    async fn trace_records_endpoint_with_data() {
+        let (state, _dir) = test_state();
+
+        // Insert a trace record directly
+        {
+            use thrum_core::traceability::{TraceArtifact, TraceRecord};
+            let store = thrum_db::trace_store::TraceStore::new(state.db());
+            let record = TraceRecord {
+                id: 0,
+                task_id: 1,
+                requirement_id: "REQ-001".into(),
+                artifact: TraceArtifact::Requirement {
+                    title: "Test req".into(),
+                    description: "Test desc".into(),
+                },
+                created_at: chrono::Utc::now(),
+            };
+            store.insert(record).unwrap();
+        }
+
+        let app = api_router(state);
+
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/api/v1/traces/records?task_id=1")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        assert_eq!(json["count"], 1);
+    }
+
+    #[tokio::test]
+    async fn trace_matrix_endpoint() {
+        let (state, _dir) = test_state();
+
+        // Insert some trace records
+        {
+            use thrum_core::traceability::{TraceArtifact, TraceRecord};
+            let store = thrum_db::trace_store::TraceStore::new(state.db());
+            store
+                .insert(TraceRecord {
+                    id: 0,
+                    task_id: 1,
+                    requirement_id: "REQ-001".into(),
+                    artifact: TraceArtifact::Test {
+                        gate_level: "Quality".into(),
+                        passed: true,
+                        report_json: "{}".into(),
+                    },
+                    created_at: chrono::Utc::now(),
+                })
+                .unwrap();
+        }
+
+        let app = api_router(state);
+
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/api/v1/traces/matrix")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        assert_eq!(json["entries"].as_array().unwrap().len(), 1);
+        assert_eq!(json["entries"][0]["requirement_id"], "REQ-001");
+        assert_eq!(json["entries"][0]["test_status"], true);
+    }
+
+    #[tokio::test]
+    async fn dashboard_traceability_section() {
+        let (state, _dir) = test_state();
+        let app = api_router(state);
+
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/dashboard")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let html = String::from_utf8(body.to_vec()).unwrap();
+        assert!(html.contains("Traceability"));
+        assert!(html.contains("partials/traceability"));
+    }
+
+    #[tokio::test]
+    async fn dashboard_traceability_partial_empty() {
+        let (state, _dir) = test_state();
+        let app = api_router(state);
+
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/dashboard/partials/traceability")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let html = String::from_utf8(body.to_vec()).unwrap();
+        assert!(html.contains("No traceability records yet"));
+    }
+
+    #[tokio::test]
+    async fn dashboard_traceability_partial_with_records() {
+        let (state, _dir) = test_state();
+
+        // Insert a trace record
+        {
+            use thrum_core::traceability::{TraceArtifact, TraceRecord};
+            let store = thrum_db::trace_store::TraceStore::new(state.db());
+            store
+                .insert(TraceRecord {
+                    id: 0,
+                    task_id: 1,
+                    requirement_id: "REQ-001".into(),
+                    artifact: TraceArtifact::Requirement {
+                        title: "Test req".into(),
+                        description: "Test desc".into(),
+                    },
+                    created_at: chrono::Utc::now(),
+                })
+                .unwrap();
+            store
+                .insert(TraceRecord {
+                    id: 0,
+                    task_id: 1,
+                    requirement_id: "REQ-001".into(),
+                    artifact: TraceArtifact::Test {
+                        gate_level: "Quality".into(),
+                        passed: true,
+                        report_json: "{}".into(),
+                    },
+                    created_at: chrono::Utc::now(),
+                })
+                .unwrap();
+        }
+
+        let app = api_router(state);
+
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/dashboard/partials/traceability")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let html = String::from_utf8(body.to_vec()).unwrap();
+        assert!(html.contains("V-Model Traceability Chain"));
+        assert!(html.contains("REQ-001"));
+        assert!(html.contains("vmodel-step"));
+    }
+
+    // ─── Spec API tests ─────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn spec_api_get_returns_none_for_new_task() {
+        let (state, _dir) = test_state();
+        // Create a task first
+        {
+            let db = state.db();
+            let store = TaskStore::new(db);
+            let task = Task::new(RepoName::new("test"), "Spec test".into(), "desc".into());
+            store.insert(task).unwrap();
+        }
+
+        let app = api_router(state);
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/api/v1/tasks/1/spec")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let spec: Option<thrum_core::spec::Spec> = serde_json::from_slice(&body).unwrap();
+        assert!(spec.is_none());
+    }
+
+    #[tokio::test]
+    async fn spec_api_set_and_get_roundtrip() {
+        let (state, _dir) = test_state();
+        // Create a task
+        {
+            let db = state.db();
+            let store = TaskStore::new(db);
+            let task = Task::new(RepoName::new("test"), "Spec test".into(), "desc".into());
+            store.insert(task).unwrap();
+        }
+
+        // Set a spec via POST
+        let spec_body = serde_json::json!({
+            "spec": {
+                "title": "API spec test",
+                "context": "Testing API endpoint",
+                "requirements": [{
+                    "id": "REQ-API-001",
+                    "description": "Must work via API",
+                    "rationale": "API integration",
+                    "priority": "P1"
+                }],
+                "design": {
+                    "approach": "Direct API call",
+                    "affected_files": ["src/api.rs"]
+                },
+                "acceptance_criteria": ["API works (TEST)"],
+                "proof_obligations": [],
+                "test_plan": []
+            }
+        });
+
+        let app = api_router(state.clone());
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .method("POST")
+                    .uri("/api/v1/tasks/1/spec")
+                    .header("content-type", "application/json")
+                    .body(Body::from(serde_json::to_string(&spec_body).unwrap()))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+
+        // GET the spec back
+        let app = api_router(state.clone());
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/api/v1/tasks/1/spec")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let spec: Option<thrum_core::spec::Spec> = serde_json::from_slice(&body).unwrap();
+        assert!(spec.is_some());
+        let spec = spec.unwrap();
+        assert_eq!(spec.title, "API spec test");
+        assert_eq!(spec.requirements.len(), 1);
+        assert_eq!(spec.requirements[0].id, "REQ-API-001");
+
+        // Verify the task's requirement_id was set from spec
+        let db = state.db();
+        let store = TaskStore::new(db);
+        let task = store.get(&TaskId(1)).unwrap().unwrap();
+        assert_eq!(task.requirement_id, Some("REQ-API-001".into()));
+    }
+
+    #[test]
+    fn dashboard_has_no_polling_triggers() {
+        let html = include_str!("../assets/dashboard.html");
+        // No hx-trigger should contain "every" (polling pattern)
+        for line in html.lines() {
+            if line.contains("hx-trigger") && line.contains("every") {
+                panic!("Found polling trigger in dashboard.html: {}", line.trim());
+            }
+        }
+    }
+
+    #[test]
+    fn dashboard_uses_event_driven_triggers() {
+        let html = include_str!("../assets/dashboard.html");
+        // All sections should use custom event triggers instead of polling
+        assert!(
+            html.contains("refreshBudget"),
+            "budget section should use refreshBudget trigger"
+        );
+        assert!(
+            html.contains("refreshStatus"),
+            "status section should use refreshStatus trigger"
+        );
+        assert!(
+            html.contains("refreshTasks"),
+            "task section should use refreshTasks trigger"
+        );
+        assert!(
+            html.contains("refreshMemory"),
+            "memory section should use refreshMemory trigger"
+        );
+        assert!(
+            html.contains("refreshTraceability"),
+            "traceability section should use refreshTraceability trigger"
+        );
+        assert!(
+            html.contains("refreshActivity"),
+            "activity section should use refreshActivity trigger"
+        );
+    }
+
+    #[test]
+    fn dashboard_still_has_initial_load() {
+        let html = include_str!("../assets/dashboard.html");
+        // Every section should still load on page load
+        let load_triggers = html.matches("hx-trigger=\"load").count();
+        assert!(
+            load_triggers >= 5,
+            "expected at least 5 initial load triggers, found {load_triggers}"
+        );
+    }
+
+    #[test]
+    fn dashboard_has_form_protection() {
+        let html = include_str!("../assets/dashboard.html");
+        assert!(
+            html.contains("sectionHasActiveForm"),
+            "dashboard should have form protection function"
+        );
+        assert!(
+            html.contains("safeRefreshSection"),
+            "dashboard should have safe refresh function"
+        );
+    }
+
+    #[test]
+    fn dashboard_has_checkbox_persistence() {
+        let html = include_str!("../assets/dashboard.html");
+        assert!(
+            html.contains("saveCheckboxState"),
+            "dashboard should save checkbox state"
+        );
+        assert!(
+            html.contains("restoreCheckboxState"),
+            "dashboard should restore checkbox state"
+        );
+    }
+
+    #[test]
+    fn dashboard_action_banners_persist_until_dismissed() {
+        let html = include_str!("../assets/dashboard.html");
+        // Should have dismiss button functionality
+        assert!(
+            html.contains("addDismissButton"),
+            "dashboard should add dismiss buttons to action results"
+        );
+        assert!(
+            html.contains("action-dismiss"),
+            "dashboard should use action-dismiss class"
+        );
+        // Should NOT auto-clear action results via setTimeout
+        let show_fn_pos = html.find("function showActionResult").unwrap();
+        let show_fn_end = html[show_fn_pos..].find("\n    }").unwrap() + show_fn_pos;
+        let show_fn_body = &html[show_fn_pos..show_fn_end];
+        assert!(
+            !show_fn_body.contains("setTimeout"),
+            "showActionResult should not use setTimeout for auto-clearing"
+        );
+    }
+
+    #[test]
+    fn dashboard_handles_budget_and_memory_events() {
+        let html = include_str!("../assets/dashboard.html");
+        assert!(
+            html.contains("BudgetUpdated"),
+            "dashboard should handle BudgetUpdated events"
+        );
+        assert!(
+            html.contains("MemoryUpdated"),
+            "dashboard should handle MemoryUpdated events"
+        );
+    }
+
+    #[test]
+    fn dashboard_has_debounced_task_refresh() {
+        let html = include_str!("../assets/dashboard.html");
+        assert!(
+            html.contains("_taskRefreshTimer"),
+            "dashboard should have debounced task refresh timer"
+        );
+    }
+
+    #[test]
+    fn dashboard_refreshes_on_agent_and_gate_completion() {
+        let html = include_str!("../assets/dashboard.html");
+        // Find the AgentFinished handler block and verify it triggers section refresh
+        let af_pos = html
+            .find("kind.AgentFinished")
+            .expect("should have AgentFinished handler");
+        let af_end = af_pos + 1000.min(html.len() - af_pos);
+        let af_block = &html[af_pos..af_end];
+        assert!(
+            af_block.contains("refreshTasks") || af_block.contains("safeRefreshSection"),
+            "AgentFinished handler should trigger task refresh. Block:\n{af_block}"
+        );
+        // Find the GateFinished handler block and verify it triggers section refresh
+        let gf_pos = html
+            .find("kind.GateFinished")
+            .expect("should have GateFinished handler");
+        let gf_end = gf_pos + 1000.min(html.len() - gf_pos);
+        let gf_block = &html[gf_pos..gf_end];
+        assert!(
+            gf_block.contains("refreshTasks") || gf_block.contains("safeRefreshSection"),
+            "GateFinished handler should trigger task refresh. Block:\n{gf_block}"
+        );
+    }
+
+    #[tokio::test]
+    async fn status_json_endpoint_returns_counts() {
+        let (state, _dir) = test_state();
+        let app = api_router(state);
+
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/dashboard/api/status")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        assert!(json.get("pending").is_some());
+        assert!(json.get("active").is_some());
+        assert!(json.get("merged").is_some());
+    }
+
+    #[tokio::test]
+    async fn budget_json_endpoint_returns_data() {
+        let (state, _dir) = test_state();
+        let app = api_router(state);
+
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/dashboard/api/budget")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        assert!(json.get("spent").is_some());
+        assert!(json.get("ceiling").is_some());
+        assert!(json.get("remaining").is_some());
+    }
+
+    #[tokio::test]
+    async fn dashboard_shows_spec_section_when_spec_exists() {
+        let (state, _dir) = test_state();
+        // Create a task with a spec
+        {
+            let db = state.db();
+            let store = TaskStore::new(db);
+            let mut task = Task::new(RepoName::new("test"), "Spec task".into(), "desc".into());
+            task.spec = Some(thrum_core::spec::Spec {
+                title: "Dashboard spec visibility".into(),
+                context: "Testing dashboard rendering".into(),
+                requirements: vec![thrum_core::spec::SpecRequirement {
+                    id: "REQ-DASH-001".into(),
+                    description: "Visible on dashboard".into(),
+                    rationale: String::new(),
+                    priority: thrum_core::spec::Priority::P1,
+                    safety_relevance: None,
+                }],
+                design: thrum_core::spec::DesignSpec {
+                    approach: "Render in HTML".into(),
+                    affected_files: vec!["src/dashboard.rs".into()],
+                    ..Default::default()
+                },
+                acceptance_criteria: vec!["Spec visible (BROWSER)".into()],
+                proof_obligations: Vec::new(),
+                test_plan: Vec::new(),
+            });
+            task.status = TaskStatus::AwaitingApproval {
+                summary: CheckpointSummary {
+                    diff_summary: "test diff".into(),
+                    reviewer_output: "LGTM".into(),
+                    gate1_report: GateReport {
+                        level: GateLevel::Quality,
+                        checks: vec![],
+                        passed: true,
+                        duration_secs: 0.0,
+                    },
+                    gate2_report: None,
+                    trust_assessment: None,
+                },
+            };
+            store.insert(task).unwrap();
+        }
+
+        let app = api_router(state);
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/dashboard/tasks/1/review")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let html = String::from_utf8(body.to_vec()).unwrap();
+        // Spec should be visible on the dashboard review page
+        // The HTML uses the Unicode escape &#x1F4CB; for the clipboard emoji
+        assert!(
+            html.contains("Structured Spec"),
+            "review page should show 'Structured Spec' heading"
+        );
+        assert!(
+            html.contains("Dashboard spec visibility"),
+            "review page should show the spec title"
+        );
+    }
+
+    #[tokio::test]
+    async fn dashboard_shows_add_spec_form_when_no_spec() {
+        let (state, _dir) = test_state();
+        // Create a task without a spec
+        {
+            let db = state.db();
+            let store = TaskStore::new(db);
+            let mut task = Task::new(RepoName::new("test"), "No spec".into(), "desc".into());
+            task.status = TaskStatus::AwaitingApproval {
+                summary: CheckpointSummary {
+                    diff_summary: "diff".into(),
+                    reviewer_output: "ok".into(),
+                    gate1_report: GateReport {
+                        level: GateLevel::Quality,
+                        checks: vec![],
+                        passed: true,
+                        duration_secs: 0.0,
+                    },
+                    gate2_report: None,
+                    trust_assessment: None,
+                },
+            };
+            store.insert(task).unwrap();
+        }
+
+        let app = api_router(state);
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/dashboard/tasks/1/review")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let html = String::from_utf8(body.to_vec()).unwrap();
+        // Should show the "Add Structured Spec" form
+        assert!(
+            html.contains("Add Structured Spec"),
+            "review page should show 'Add Structured Spec' prompt"
+        );
+        assert!(
+            html.contains("spec_toml"),
+            "review page should have spec_toml textarea"
+        );
+    }
+
+    #[tokio::test]
+    async fn dashboard_update_spec_action() {
+        let (state, _dir) = test_state();
+        // Create a task
+        {
+            let db = state.db();
+            let store = TaskStore::new(db);
+            let task = Task::new(RepoName::new("test"), "Update spec".into(), "desc".into());
+            store.insert(task).unwrap();
+        }
+
+        let spec_toml = "title = \"Updated spec\"\ncontext = \"Via dashboard\"\n\n[design]\napproach = \"Direct edit\"\n";
+
+        // Manually percent-encode the TOML for form submission
+        let encoded: String = spec_toml
+            .bytes()
+            .map(|b| match b {
+                b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
+                    format!("{}", b as char)
+                }
+                b' ' => "+".to_string(),
+                _ => format!("%{:02X}", b),
+            })
+            .collect();
+
+        let app = api_router(state.clone());
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .method("POST")
+                    .uri("/dashboard/tasks/1/spec")
+                    .header("content-type", "application/x-www-form-urlencoded")
+                    .body(Body::from(format!("spec_toml={}", encoded)))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let html = String::from_utf8(body.to_vec()).unwrap();
+        assert!(html.contains("Spec updated"));
+
+        // Verify spec was stored
+        let db = state.db();
+        let store = TaskStore::new(db);
+        let task = store.get(&TaskId(1)).unwrap().unwrap();
+        assert!(task.spec.is_some());
+        assert_eq!(task.spec.as_ref().unwrap().title, "Updated spec");
+    }
+
+    // ─── Gate failure report tests ──────────────────────────────────────
+
+    #[tokio::test]
+    async fn api_task_response_includes_gate_report_for_failed_task() {
+        let (state, _dir) = test_state();
+
+        // Insert a task and set it to Gate1Failed with a report
+        {
+            let store = TaskStore::new(state.db());
+            let mut task = Task::new(
+                RepoName::new("loom"),
+                "Gate fail test".into(),
+                "desc".into(),
+            );
+            task = store.insert(task).unwrap();
+            task.status = TaskStatus::Gate1Failed {
+                report: thrum_core::task::GateReport {
+                    level: thrum_core::task::GateLevel::Quality,
+                    checks: vec![
+                        thrum_core::task::CheckResult {
+                            name: "cargo_fmt".into(),
+                            passed: true,
+                            stdout: String::new(),
+                            stderr: String::new(),
+                            exit_code: 0,
+                            duration_secs: 0.0,
+                            findings: Vec::new(),
+                        },
+                        thrum_core::task::CheckResult {
+                            name: "cargo_clippy".into(),
+                            passed: false,
+                            stdout: String::new(),
+                            stderr: "error: unused variable `x`".into(),
+                            exit_code: 1,
+                            duration_secs: 0.0,
+                            findings: Vec::new(),
+                        },
+                    ],
+                    passed: false,
+                    duration_secs: 3.5,
+                },
+            };
+            store.update(&task).unwrap();
+        }
+
+        let app = api_router(state);
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/api/v1/tasks/1")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+
+        // gate_report should be present
+        assert!(json["gate_report"].is_object(), "gate_report should exist");
+        assert_eq!(json["gate_report"]["passed"], false);
+        assert_eq!(json["gate_report"]["level"], "Quality");
+        assert_eq!(json["gate_report"]["checks"].as_array().unwrap().len(), 2);
+
+        // failing_checks convenience field
+        let failing = json["failing_checks"].as_array().unwrap();
+        assert_eq!(failing.len(), 1);
+        assert_eq!(failing[0], "cargo_clippy");
+
+        // status should still be a string
+        assert_eq!(json["status"], "gate1-failed");
+    }
+
+    #[tokio::test]
+    async fn api_task_response_omits_gate_report_for_pending_task() {
+        let (state, _dir) = test_state();
+
+        {
+            let store = TaskStore::new(state.db());
+            let task = Task::new(RepoName::new("loom"), "Pending task".into(), "desc".into());
+            store.insert(task).unwrap();
+        }
+
+        let app = api_router(state);
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/api/v1/tasks/1")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+
+        // gate_report should be absent (skip_serializing_if)
+        assert!(json.get("gate_report").is_none());
+        // failing_checks should be absent (empty vec)
+        assert!(json.get("failing_checks").is_none());
+        // gate_history should be absent (empty vec)
+        assert!(json.get("gate_history").is_none());
+    }
+
+    #[tokio::test]
+    async fn dashboard_task_detail_shows_gate_failure_report() {
+        let (state, _dir) = test_state();
+
+        {
+            let store = TaskStore::new(state.db());
+            let mut task = Task::new(
+                RepoName::new("loom"),
+                "Gate detail test".into(),
+                "description".into(),
+            );
+            task = store.insert(task).unwrap();
+            task.status = TaskStatus::Gate1Failed {
+                report: thrum_core::task::GateReport {
+                    level: thrum_core::task::GateLevel::Quality,
+                    checks: vec![thrum_core::task::CheckResult {
+                        name: "cargo_test".into(),
+                        passed: false,
+                        stdout: "test output".into(),
+                        stderr: "test failed: assertion".into(),
+                        exit_code: 1,
+                        duration_secs: 0.0,
+                        findings: Vec::new(),
+                    }],
+                    passed: false,
+                    duration_secs: 7.2,
+                },
+            };
+            store.update(&task).unwrap();
+        }
+
+        let app = api_router(state);
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/dashboard/partials/task-detail/1")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let html = String::from_utf8(body.to_vec()).unwrap();
+
+        // Should contain the gate failure section
+        assert!(
+            html.contains("Gate Failure Report"),
+            "should show gate failure header"
+        );
+        assert!(
+            html.contains("cargo_test"),
+            "should show failing check name"
+        );
+        assert!(html.contains("FAILED"), "should show FAILED status");
+    }
+
+    #[tokio::test]
+    async fn dashboard_task_row_tooltip_shows_failing_checks() {
+        let (state, _dir) = test_state();
+
+        {
+            let store = TaskStore::new(state.db());
+            let mut task = Task::new(RepoName::new("loom"), "Tooltip test".into(), "desc".into());
+            task = store.insert(task).unwrap();
+            task.status = TaskStatus::Gate1Failed {
+                report: thrum_core::task::GateReport {
+                    level: thrum_core::task::GateLevel::Quality,
+                    checks: vec![thrum_core::task::CheckResult {
+                        name: "cargo_clippy".into(),
+                        passed: false,
+                        stdout: String::new(),
+                        stderr: "error: lint".into(),
+                        exit_code: 1,
+                        duration_secs: 0.0,
+                        findings: Vec::new(),
+                    }],
+                    passed: false,
+                    duration_secs: 2.0,
+                },
+            };
+            store.update(&task).unwrap();
+        }
+
+        let app = api_router(state);
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/dashboard/partials/tasks")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let html = String::from_utf8(body.to_vec()).unwrap();
+
+        // The tooltip should include the failing check name
+        assert!(
+            html.contains("Gate 1 failed: cargo_clippy"),
+            "task row tooltip should show failing check: {html}"
+        );
+    }
+
+    #[tokio::test]
+    async fn api_task_response_includes_gate_history() {
+        let (state, _dir) = test_state();
+
+        {
+            let store = TaskStore::new(state.db());
+            let mut task = Task::new(RepoName::new("loom"), "History test".into(), "desc".into());
+            task = store.insert(task).unwrap();
+            // Simulate a previous gate failure stored in history
+            task.gate_history.push(thrum_core::task::GateReport {
+                level: thrum_core::task::GateLevel::Quality,
+                checks: vec![thrum_core::task::CheckResult {
+                    name: "cargo_fmt".into(),
+                    passed: false,
+                    stdout: String::new(),
+                    stderr: "formatting error".into(),
+                    exit_code: 1,
+                    duration_secs: 0.0,
+                    findings: Vec::new(),
+                }],
+                passed: false,
+                duration_secs: 1.0,
+            });
+            task.retry_count = 1;
+            // Currently in gate1-failed again
+            task.status = TaskStatus::Gate1Failed {
+                report: thrum_core::task::GateReport {
+                    level: thrum_core::task::GateLevel::Quality,
+                    checks: vec![thrum_core::task::CheckResult {
+                        name: "cargo_clippy".into(),
+                        passed: false,
+                        stdout: String::new(),
+                        stderr: "clippy error".into(),
+                        exit_code: 1,
+                        duration_secs: 0.0,
+                        findings: Vec::new(),
+                    }],
+                    passed: false,
+                    duration_secs: 2.0,
+                },
+            };
+            store.update(&task).unwrap();
+        }
+
+        let app = api_router(state);
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .uri("/api/v1/tasks/1")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::OK);
+        let body = axum::body::to_bytes(response.into_body(), usize::MAX)
+            .await
+            .unwrap();
+        let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+
+        // Current gate_report
+        assert!(json["gate_report"].is_object());
+        assert_eq!(json["gate_report"]["checks"][0]["name"], "cargo_clippy");
+
+        // Historical gate reports
+        let history = json["gate_history"].as_array().unwrap();
+        assert_eq!(history.len(), 1);
+        assert_eq!(history[0]["checks"][0]["name"], "cargo_fmt");
+    }
 }
diff --git a/crates/thrum-api/src/ws.rs b/crates/thrum-api/src/ws.rs
new file mode 100644
index 0000000..90060da
--- /dev/null
+++ b/crates/thrum-api/src/ws.rs
@@ -0,0 +1,326 @@
+//! WebSocket endpoint for bidirectional pipeline communication.
+//!
+//! Provides a WebSocket upgrade at `GET /ws` that:
+//! - **Server -> Client**: streams all `PipelineEvent`s from the `EventBus` as JSON
+//! - **Client -> Server**: accepts JSON command messages for chat injection, agent abort, etc.
+//!
+//! The existing SSE endpoint at `/api/v1/events/stream` is preserved for backwards
+//! compatibility. Dashboard JS connects via WebSocket first and falls back to SSE
+//! if WebSocket is unavailable.
+//!
+//! ## Wire protocol
+//!
+//! Server-to-client messages are JSON objects:
+//! ```json
+//! { "type": "event", "data": { "timestamp": "...", "kind": { ... } } }
+//! ```
+//!
+//! Client-to-server messages are JSON objects:
+//! ```json
+//! { "type": "command", "command": "<name>", "payload": { ... } }
+//! ```
+//!
+//! Currently recognised commands:
+//! - `ping` — server responds with `{ "type": "pong" }`
+
+use axum::{
+    extract::{
+        State,
+        ws::{Message, WebSocket, WebSocketUpgrade},
+    },
+    response::IntoResponse,
+};
+use futures_util::{SinkExt, StreamExt};
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+use tokio_stream::wrappers::BroadcastStream;
+
+use crate::ApiState;
+
+/// Envelope for server-to-client WebSocket messages.
+#[derive(Serialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+enum WsOutgoing {
+    /// A pipeline event broadcast.
+    Event {
+        data: thrum_core::event::PipelineEvent,
+    },
+    /// Response to a `ping` command.
+    Pong,
+    /// Notification that the client lagged and events were skipped.
+    Lagged { skipped: u64 },
+    /// Error response to an invalid command.
+    Error { message: String },
+    /// Acknowledgement of a recognised command.
+    Ack { command: String },
+}
+
+/// Envelope for client-to-server WebSocket messages.
+#[derive(Deserialize)]
+struct WsIncoming {
+    /// The command name (e.g. "ping").
+    command: String,
+    /// Optional command-specific payload.
+    #[serde(default)]
+    #[allow(dead_code)]
+    payload: serde_json::Value,
+}
+
+/// `GET /ws` — upgrade to a WebSocket connection.
+///
+/// The handler subscribes to the shared `EventBus` and forwards every
+/// `PipelineEvent` as a JSON text frame. Incoming text frames are parsed
+/// as `WsIncoming` commands.
+pub async fn ws_handler(
+    ws: WebSocketUpgrade,
+    State(state): State<Arc<ApiState>>,
+) -> impl IntoResponse {
+    ws.on_upgrade(move |socket| handle_socket(socket, state))
+}
+
+/// Run the WebSocket connection: two concurrent loops for send and receive.
+///
+/// Uses an internal `mpsc` channel so that command responses from the receive
+/// loop flow back through the send loop to the single `SplitSink`.
+async fn handle_socket(socket: WebSocket, state: Arc<ApiState>) {
+    let (mut sender, mut receiver) = socket.split();
+
+    // Channel for command responses: recv loop -> send loop
+    let (resp_tx, mut resp_rx) = tokio::sync::mpsc::channel::<WsOutgoing>(64);
+
+    // Subscribe to the event bus
+    let rx = state.event_bus.subscribe();
+    let mut event_stream = BroadcastStream::new(rx);
+
+    // Send loop: multiplex EventBus events and command responses onto the socket
+    let send_task = tokio::spawn(async move {
+        loop {
+            let msg: Option<WsOutgoing> = tokio::select! {
+                Some(result) = event_stream.next() => {
+                    Some(match result {
+                        Ok(event) => WsOutgoing::Event { data: event },
+                        Err(tokio_stream::wrappers::errors::BroadcastStreamRecvError::Lagged(n)) => {
+                            tracing::debug!(skipped = n, "WebSocket client lagged, skipping events");
+                            WsOutgoing::Lagged { skipped: n }
+                        }
+                    })
+                }
+                Some(response) = resp_rx.recv() => {
+                    Some(response)
+                }
+                else => None,
+            };
+
+            let Some(outgoing) = msg else {
+                break;
+            };
+
+            let json = match serde_json::to_string(&outgoing) {
+                Ok(j) => j,
+                Err(e) => {
+                    tracing::warn!("failed to serialize WS message: {e}");
+                    continue;
+                }
+            };
+
+            if sender.send(Message::Text(json.into())).await.is_err() {
+                // Client disconnected
+                break;
+            }
+        }
+    });
+
+    // Receive loop: read commands from the client and send responses via the channel
+    let recv_task = tokio::spawn(async move {
+        while let Some(Ok(msg)) = receiver.next().await {
+            match msg {
+                Message::Text(text) => {
+                    let response = match serde_json::from_str::<WsIncoming>(&text) {
+                        Ok(cmd) => handle_command(cmd),
+                        Err(e) => WsOutgoing::Error {
+                            message: format!("invalid command JSON: {e}"),
+                        },
+                    };
+                    if resp_tx.send(response).await.is_err() {
+                        // Send loop has exited
+                        break;
+                    }
+                }
+                Message::Close(_) => break,
+                // Ignore binary/ping/pong frames — axum handles protocol-level pings
+                _ => {}
+            }
+        }
+    });
+
+    // Wait for either task to finish (client disconnect or bus closure)
+    tokio::select! {
+        _ = send_task => {},
+        _ = recv_task => {},
+    }
+}
+
+/// Route an incoming command to the appropriate handler.
+fn handle_command(cmd: WsIncoming) -> WsOutgoing {
+    match cmd.command.as_str() {
+        "ping" => WsOutgoing::Pong,
+        other => WsOutgoing::Ack {
+            command: other.to_string(),
+        },
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use axum::body::Body;
+    use axum::http::Request;
+    use thrum_core::event::{EventKind, LogLevel};
+    use tower::ServiceExt;
+
+    fn test_state() -> (Arc<ApiState>, tempfile::TempDir) {
+        let dir = tempfile::tempdir().unwrap();
+        let db_path = dir.path().join("test.redb");
+        let state = Arc::new(ApiState::new(&db_path, dir.path().join("traces"), None).unwrap());
+        (state, dir)
+    }
+
+    #[tokio::test]
+    async fn ws_upgrade_requires_upgrade_header() {
+        // A plain GET to /ws without WebSocket headers should fail with 400 or similar
+        let (state, _dir) = test_state();
+        let app = crate::api_router(state);
+
+        let response = app
+            .oneshot(Request::builder().uri("/ws").body(Body::empty()).unwrap())
+            .await
+            .unwrap();
+
+        // Without proper upgrade headers, axum returns an error
+        assert_ne!(response.status(), 200);
+    }
+
+    #[tokio::test]
+    async fn ws_endpoint_available_on_real_server() {
+        // Verify the WebSocket endpoint is wired into the router by checking
+        // that a real TCP connection can reach it (upgrade requires a real connection).
+        let (state, _dir) = test_state();
+        let event_bus = state.event_bus.clone();
+
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+
+        let server = tokio::spawn(async move {
+            let app = crate::api_router(state);
+            axum::serve(listener, app).await.unwrap();
+        });
+
+        // Give the server time to start
+        tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+
+        // Connect with a WebSocket client
+        let url = format!("ws://127.0.0.1:{}/ws", addr.port());
+        let (mut ws, _resp) = tokio_tungstenite::connect_async(&url).await.unwrap();
+
+        // Send a ping command
+        use tokio_tungstenite::tungstenite;
+        ws.send(tungstenite::Message::Text(
+            serde_json::json!({"command": "ping"}).to_string().into(),
+        ))
+        .await
+        .unwrap();
+
+        // Emit an event on the bus
+        event_bus.emit(EventKind::EngineLog {
+            level: LogLevel::Info,
+            message: "hello from ws test".into(),
+        });
+
+        // Read messages (we should get pong + event in some order)
+        let mut got_pong = false;
+        let mut got_event = false;
+
+        for _ in 0..10 {
+            let timeout =
+                tokio::time::timeout(std::time::Duration::from_millis(500), ws.next()).await;
+            match timeout {
+                Ok(Some(Ok(tungstenite::Message::Text(text)))) => {
+                    let val: serde_json::Value = serde_json::from_str(&text).unwrap();
+                    match val["type"].as_str() {
+                        Some("pong") => got_pong = true,
+                        Some("event") => got_event = true,
+                        _ => {}
+                    }
+                    if got_pong && got_event {
+                        break;
+                    }
+                }
+                _ => break,
+            }
+        }
+
+        assert!(got_pong, "should have received pong response");
+        assert!(got_event, "should have received event via WebSocket");
+
+        server.abort();
+    }
+
+    #[test]
+    fn handle_ping_command() {
+        let cmd = WsIncoming {
+            command: "ping".into(),
+            payload: serde_json::Value::Null,
+        };
+        let result = handle_command(cmd);
+        let json = serde_json::to_value(&result).unwrap();
+        assert_eq!(json["type"], "pong");
+    }
+
+    #[test]
+    fn handle_unknown_command_returns_ack() {
+        let cmd = WsIncoming {
+            command: "future_feature".into(),
+            payload: serde_json::json!({"task_id": 42}),
+        };
+        let result = handle_command(cmd);
+        let json = serde_json::to_value(&result).unwrap();
+        assert_eq!(json["type"], "ack");
+        assert_eq!(json["command"], "future_feature");
+    }
+
+    #[test]
+    fn ws_outgoing_event_serializes() {
+        let event = thrum_core::event::PipelineEvent::new(EventKind::EngineLog {
+            level: LogLevel::Info,
+            message: "test".into(),
+        });
+        let msg = WsOutgoing::Event { data: event };
+        let json = serde_json::to_string(&msg).unwrap();
+        assert!(json.contains("\"type\":\"event\""));
+        assert!(json.contains("\"data\""));
+    }
+
+    #[test]
+    fn ws_outgoing_lagged_serializes() {
+        let msg = WsOutgoing::Lagged { skipped: 42 };
+        let json = serde_json::to_string(&msg).unwrap();
+        assert!(json.contains("\"type\":\"lagged\""));
+        assert!(json.contains("\"skipped\":42"));
+    }
+
+    #[test]
+    fn ws_incoming_deserializes_with_payload() {
+        let json = r#"{"command":"abort","payload":{"task_id":5}}"#;
+        let cmd: WsIncoming = serde_json::from_str(json).unwrap();
+        assert_eq!(cmd.command, "abort");
+        assert_eq!(cmd.payload["task_id"], 5);
+    }
+
+    #[test]
+    fn ws_incoming_deserializes_without_payload() {
+        let json = r#"{"command":"ping"}"#;
+        let cmd: WsIncoming = serde_json::from_str(json).unwrap();
+        assert_eq!(cmd.command, "ping");
+        assert!(cmd.payload.is_null());
+    }
+}
diff --git a/crates/thrum-cli/Cargo.toml b/crates/thrum-cli/Cargo.toml
index 0996d17..3207f39 100644
--- a/crates/thrum-cli/Cargo.toml
+++ b/crates/thrum-cli/Cargo.toml
@@ -24,8 +24,6 @@ tracing = { workspace = true }
 tracing-subscriber = { workspace = true }
 redb = { workspace = true }
 toml = { workspace = true }
-ratatui = { workspace = true }
-crossterm = { workspace = true }
 
 [build-dependencies]
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
diff --git a/crates/thrum-cli/build.rs b/crates/thrum-cli/build.rs
index c73b897..6ff8a8d 100644
--- a/crates/thrum-cli/build.rs
+++ b/crates/thrum-cli/build.rs
@@ -71,6 +71,12 @@ fn main() {
 fn cmd(program: &str, args: &[&str]) -> String {
     Command::new(program)
         .args(args)
+        // Strip git env vars that leak from pre-commit hooks.
+        // GIT_INDEX_FILE points to a temporary staging index during commits,
+        // which causes git commands here to read from the wrong index.
+        .env_remove("GIT_INDEX_FILE")
+        .env_remove("GIT_DIR")
+        .env_remove("GIT_WORK_TREE")
         .output()
         .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
         .unwrap_or_else(|_| "unknown".into())
diff --git a/crates/thrum-cli/src/main.rs b/crates/thrum-cli/src/main.rs
index b239ae5..a3d0782 100644
--- a/crates/thrum-cli/src/main.rs
+++ b/crates/thrum-cli/src/main.rs
@@ -1,5 +1,3 @@
-mod watch;
-
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::{Parser, Subcommand};
@@ -8,20 +6,15 @@ use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use thrum_core::budget::BudgetTracker;
-use thrum_core::consistency::check_consistency;
 use thrum_core::gate::{run_gate, run_integration_gate};
 use thrum_core::repo::ReposConfig;
 use thrum_core::spec::Spec;
-use thrum_core::sphinx_needs::{NeedsJson, trace_record_to_needs};
 use thrum_core::task::{GateLevel, RepoName, Task, TaskId, TaskStatus};
 use thrum_core::telemetry::{TelemetryConfig, TraceFilter, TraceReader, init_telemetry};
 use thrum_db::budget_store::BudgetStore;
 use thrum_db::gate_store::GateStore;
 use thrum_db::meta_store::MetaStore;
 use thrum_db::task_store::TaskStore;
-use thrum_db::trace_store::TraceStore;
-use thrum_runner::backend::{AiRequest, BackendRegistry};
-use thrum_runner::claude::{ClaudeCliBackend, load_agent_prompt};
 use thrum_runner::parallel::{EngineConfig, PipelineContext};
 use tokio_util::sync::CancellationToken;
 
@@ -114,22 +107,13 @@ enum Commands {
         #[command(subcommand)]
         action: TaskAction,
     },
-    /// Show dashboard: tasks, consistency, budget.
+    /// Show dashboard: tasks, budget.
     Status,
-    /// Run cross-repo consistency checker.
-    Check,
-    /// Export traceability data.
-    Trace {
-        #[command(subcommand)]
-        action: TraceAction,
-    },
     /// View locally stored OpenTelemetry traces.
     Traces {
         #[command(subcommand)]
         action: TracesAction,
     },
-    /// Show tool safety classification (TCL, ASIL, SOUP).
-    Safety,
     /// Build release artifacts.
     Release {
         /// Dry run (don't actually create release).
@@ -152,8 +136,6 @@ enum Commands {
         #[command(subcommand)]
         action: MemoryAction,
     },
-    /// Live TUI dashboard showing agent activity.
-    Watch,
 }
 
 #[derive(Subcommand)]
@@ -178,25 +160,6 @@ enum TracesAction {
     },
 }
 
-#[derive(Subcommand)]
-enum TraceAction {
-    /// Export traceability as sphinx-needs JSON (needs.json).
-    Export {
-        #[arg(long, default_value = "docs/needs.json")]
-        output: PathBuf,
-        #[arg(long, default_value = "0.1.0")]
-        version: String,
-    },
-    /// Generate RST traceability page for a tool.
-    Rst {
-        tool: String,
-        #[arg(long)]
-        output: Option<PathBuf>,
-    },
-    /// Show traceability gaps (requirements without tests/proofs).
-    Gaps,
-}
-
 #[derive(Subcommand)]
 enum MemoryAction {
     /// List memory entries, optionally filtered by repo or category.
@@ -284,6 +247,19 @@ enum TaskAction {
         #[arg(long)]
         output: Option<PathBuf>,
     },
+    /// View or set a task's structured specification (SDD).
+    ///
+    /// Without --set, prints the current spec as Markdown (or TOML with --toml).
+    /// With --set, loads a TOML spec file and stores it on the task.
+    Spec {
+        id: i64,
+        /// Path to a TOML spec file to set on the task.
+        #[arg(long)]
+        set: Option<PathBuf>,
+        /// Output as TOML instead of Markdown.
+        #[arg(long)]
+        toml: bool,
+    },
 }
 
 #[tokio::main]
@@ -354,17 +330,11 @@ async fn main() -> Result<()> {
             let db = open_db()?;
             cmd_memory(&db, action)
         }
-        Commands::Trace { action } => {
-            let db = open_db()?;
-            cmd_trace(&db, action)
-        }
         Commands::Traces { action } => cmd_traces(&cli.trace_dir, action),
-        Commands::Safety => cmd_safety(),
         Commands::Status => {
             let db = open_db()?;
-            cmd_status(&db, &cli.config)
+            cmd_status(&db)
         }
-        Commands::Check => cmd_check(&cli.config),
         Commands::Release { dry_run, tag } => {
             let db = open_db()?;
             let repos_config = ReposConfig::load(&cli.config)?;
@@ -382,64 +352,6 @@ async fn main() -> Result<()> {
             cmd_changelog();
             Ok(())
         }
-        Commands::Watch => {
-            let db = open_db()?;
-            let repos_config = ReposConfig::load(&cli.config)?;
-            let pipeline = PipelineConfig::load(&cli.pipeline)?;
-            let registry = build_registry(&pipeline)?;
-
-            let roles_config = if pipeline.roles.is_empty() {
-                thrum_core::role::RolesConfig::default()
-            } else {
-                thrum_core::role::RolesConfig {
-                    roles: pipeline.roles,
-                }
-            };
-
-            let budget_tracker = {
-                let budget_store = BudgetStore::new(&db);
-                match budget_store.load()? {
-                    Some(mut existing) => {
-                        existing.ceiling_usd = pipeline.budget.ceiling_usd;
-                        existing
-                    }
-                    None => BudgetTracker::new(pipeline.budget.ceiling_usd),
-                }
-            };
-            let budget = Arc::new(tokio::sync::Mutex::new(budget_tracker));
-
-            let shared_db = Arc::new(db);
-            let event_bus = thrum_runner::event_bus::EventBus::new();
-            let conflict_policy = thrum_core::coordination::ConflictPolicy::default();
-            let coordination = thrum_runner::coordination_hub::CoordinationHub::new(
-                event_bus.clone(),
-                conflict_policy,
-            );
-
-            let ctx = Arc::new(PipelineContext {
-                db: shared_db,
-                repos_config: Arc::new(repos_config),
-                agents_dir: cli.agents_dir.clone(),
-                registry: Arc::new(registry),
-                session_budget_usd: None,
-                budget,
-                roles: Some(Arc::new(roles_config)),
-                sandbox_config: pipeline.sandbox,
-                event_bus,
-                integration_steps: pipeline
-                    .gates
-                    .integration
-                    .as_ref()
-                    .map(|g| g.steps.clone())
-                    .unwrap_or_default(),
-                subsample: pipeline.subsample,
-                worktrees_dir: pipeline.engine.worktrees_dir,
-                coordination,
-                conflict_policy,
-            });
-
-            watch::run_watch_tui(ctx).await
-        }
     }
 }
 
@@ -587,36 +499,6 @@ impl PipelineConfig {
 
 // ─── Autonomous Loop ─────────────────────────────────────────────────────
 
-/// Build a backend registry from pipeline config.
-///
-/// If `[[backends]]` are configured, uses config-driven registration.
-/// Otherwise falls back to hardcoded defaults (Claude CLI + Anthropic API).
-fn build_registry(pipeline: &PipelineConfig) -> Result<BackendRegistry> {
-    let default_cwd = std::env::current_dir()?;
-
-    let registry = if !pipeline.backends.is_empty() {
-        // Config-driven: any coding agent can be plugged in via pipeline.toml
-        thrum_runner::backend::build_registry_from_config(&pipeline.backends, &default_cwd)?
-    } else {
-        // Fallback: hardcoded Claude + Anthropic API (backward compatible)
-        let mut registry = BackendRegistry::new();
-        registry.register(Box::new(ClaudeCliBackend::new(default_cwd)));
-        if let Ok(backend) =
-            thrum_runner::anthropic::AnthropicApiBackend::from_env("claude-sonnet-4-5-20250929")
-        {
-            registry.register(Box::new(backend));
-        }
-        registry
-    };
-
-    tracing::info!(
-        backends = ?registry.list().iter().map(|(n, c, m)| format!("{n} ({c:?}, {m})")).collect::<Vec<_>>(),
-        "initialized backend registry"
-    );
-
-    Ok(registry)
-}
-
 /// Check all managed repos for advancement beyond what Thrum last saw.
 ///
 /// Also checks if the Thrum binary itself has been upgraded since the last
@@ -735,7 +617,9 @@ async fn cmd_run_parallel(
     config_path: PathBuf,
 ) -> Result<()> {
     let pipeline = PipelineConfig::load(pipeline_config)?;
-    let registry = build_registry(&pipeline)?;
+
+    // Create the process tracker for graceful shutdown of agent subprocesses.
+    let process_tracker = thrum_runner::shutdown::ProcessTracker::new();
     let shared_db = Arc::new(thrum_db::open_db(db_path)?);
 
     // Check if any repos have advanced since last Thrum run
@@ -775,12 +659,33 @@ async fn cmd_run_parallel(
     let shutdown = CancellationToken::new();
     let shutdown_signal = shutdown.clone();
 
-    // Signal handler for graceful shutdown
+    // Signal handler for graceful shutdown: handles both SIGINT (Ctrl+C) and SIGTERM.
     tokio::spawn(async move {
-        if tokio::signal::ctrl_c().await.is_ok() {
+        let ctrl_c = tokio::signal::ctrl_c();
+
+        #[cfg(unix)]
+        {
+            use tokio::signal::unix::{SignalKind, signal};
+            let mut sigterm =
+                signal(SignalKind::terminate()).expect("failed to register SIGTERM handler");
+
+            tokio::select! {
+                _ = ctrl_c => {
+                    tracing::info!("received SIGINT (Ctrl+C), initiating graceful shutdown");
+                }
+                _ = sigterm.recv() => {
+                    tracing::info!("received SIGTERM, initiating graceful shutdown");
+                }
+            }
+        }
+
+        #[cfg(not(unix))]
+        {
+            let _ = ctrl_c.await;
             tracing::info!("received Ctrl+C, initiating graceful shutdown");
-            shutdown_signal.cancel();
         }
+
+        shutdown_signal.cancel();
     });
 
     // Spawn A2A/HTTP API server if --serve was passed.
@@ -815,7 +720,6 @@ async fn cmd_run_parallel(
         db: shared_db.clone(),
         repos_config: Arc::new(repos_config),
         agents_dir: agents_dir.to_path_buf(),
-        registry: Arc::new(registry),
         session_budget_usd: None,
         budget: budget.clone(),
         roles: Some(Arc::new(roles_config)),
@@ -831,6 +735,8 @@ async fn cmd_run_parallel(
         worktrees_dir: pipeline.engine.worktrees_dir,
         coordination,
         conflict_policy,
+        process_tracker: process_tracker.clone(),
+        repo_cooldowns: thrum_runner::parallel::RepoCooldownTracker::new(),
     });
 
     let config = EngineConfig {
@@ -884,7 +790,6 @@ async fn cmd_run(
     check_repos_advanced(db, repos_config);
 
     let pipeline = PipelineConfig::load(pipeline_config)?;
-    let registry = build_registry(&pipeline)?;
     let integration_steps = pipeline
         .gates
         .integration
@@ -931,7 +836,6 @@ async fn cmd_run(
                 &gate_store,
                 repos_config,
                 agents_dir,
-                &registry,
                 &event_bus,
                 &budget,
                 subsample.as_ref(),
@@ -971,6 +875,52 @@ async fn cmd_run(
             continue;
         }
 
+        // Phase B¾: Process AwaitingCI tasks (poll CI, handle pass/fail)
+        {
+            let all_tasks = task_store.list(None, None)?;
+            let mut handled_ci = false;
+            for ci_task in all_tasks {
+                if !ci_task.status.is_awaiting_ci() {
+                    continue;
+                }
+                if let Some(ref filter) = repo_filter
+                    && &ci_task.repo != filter
+                {
+                    continue;
+                }
+                let repo_config = repos_config.get(&ci_task.repo);
+                let ci_enabled = repo_config
+                    .and_then(|rc| rc.ci.as_ref())
+                    .is_some_and(|ci| ci.enabled);
+                if !ci_enabled {
+                    continue;
+                }
+                let repo_path = repo_config.map(|rc| rc.path.clone()).unwrap_or_default();
+                tracing::info!(task_id = %ci_task.id, "processing AwaitingCI task");
+                let result = thrum_runner::ci::run_ci_loop(
+                    &task_store,
+                    &event_bus,
+                    &repo_path,
+                    agents_dir,
+                    None,
+                    ci_task,
+                )
+                .await;
+                match result {
+                    Ok(()) => tracing::info!("CI loop completed"),
+                    Err(e) => tracing::error!("CI loop failed: {e:#}"),
+                }
+                handled_ci = true;
+                break; // Process one CI task per iteration
+            }
+            if handled_ci {
+                if once {
+                    break;
+                }
+                continue;
+            }
+        }
+
         // Phase B½: Resume tasks with checkpoints (if --resume flag is set)
         if resume {
             let checkpoint_store = thrum_db::checkpoint_store::CheckpointStore::new(db);
@@ -1007,7 +957,6 @@ async fn cmd_run(
                         &gate_store,
                         repos_config,
                         agents_dir,
-                        &registry,
                         None,
                         &event_bus,
                         &budget,
@@ -1043,14 +992,7 @@ async fn cmd_run(
             None => {
                 // Phase D: No pending tasks — invoke planner if queue empty
                 tracing::info!("no pending tasks, invoking planner");
-                let planned = invoke_planner(
-                    &task_store,
-                    repos_config,
-                    agents_dir,
-                    &registry,
-                    repo_filter.as_ref(),
-                )
-                .await;
+                let planned = invoke_planner(&task_store, agents_dir, repo_filter.as_ref()).await;
                 match planned {
                     Ok(count) if count > 0 => {
                         tracing::info!(count, "planner created new tasks");
@@ -1075,7 +1017,6 @@ async fn cmd_run(
             &gate_store,
             repos_config,
             agents_dir,
-            &registry,
             None,
             &event_bus,
             &budget,
@@ -1117,18 +1058,11 @@ async fn cmd_run(
 /// Invoke the planner agent to auto-generate tasks.
 async fn invoke_planner(
     task_store: &TaskStore<'_>,
-    repos_config: &ReposConfig,
     agents_dir: &Path,
-    registry: &BackendRegistry,
     repo_filter: Option<&RepoName>,
 ) -> Result<usize> {
-    let planner = registry
-        .chat()
-        .or_else(|| registry.agent())
-        .context("no backend available for planning")?;
-
     let planner_prompt_file = agents_dir.join("planner.md");
-    let system_prompt = load_agent_prompt(&planner_prompt_file, None)
+    let system_prompt = thrum_runner::claude_code::load_agent_prompt(&planner_prompt_file, None)
         .await
         .unwrap_or_default();
 
@@ -1151,29 +1085,33 @@ async fn invoke_planner(
         context.push('\n');
     }
 
-    // Run consistency check for context
-    let mut repo_paths: HashMap<RepoName, &std::path::Path> = HashMap::new();
-    for repo in &repos_config.repo {
-        repo_paths.insert(repo.name.clone(), &repo.path);
-    }
-    if let Ok(report) = check_consistency(&repo_paths)
-        && !report.issues.is_empty()
-    {
-        context.push_str("## Consistency Issues\n\n");
-        for issue in &report.issues {
-            context.push_str(&format!("- {issue}\n"));
-        }
-        context.push('\n');
-    }
-
     if let Some(filter) = repo_filter {
         context.push_str(&format!("\nFocus on repo: {filter}\n"));
     }
 
     context.push_str("\nGenerate a JSON array of tasks. Respond with ONLY the JSON array.\n");
 
-    let request = AiRequest::new(&context).with_system(system_prompt);
-    let result = planner.invoke(&request).await?;
+    let full_prompt = format!("{system_prompt}\n\n{context}");
+    let cwd = std::env::current_dir()?;
+    let event_bus = thrum_runner::event_bus::EventBus::new();
+
+    let agent_config = thrum_runner::claude_code::AgentConfig {
+        prompt: full_prompt,
+        cwd,
+        max_budget_usd: 1.0,
+        model: "claude-opus-4-6".into(),
+        resume_session_id: None,
+        agent: None,
+        worktree: false,
+        permission_mode: "auto".into(),
+        timeout_secs: 300,
+    };
+
+    let agent_id = thrum_core::agent::AgentId("planner".into());
+    let task_id = thrum_core::task::TaskId(0);
+    let result =
+        thrum_runner::claude_code::invoke_streaming(&agent_config, &event_bus, &agent_id, &task_id)
+            .await?;
 
     // Parse JSON array of tasks from planner output
     let tasks: Vec<PlannerTask> = match serde_json::from_str(&result.content) {
@@ -1205,7 +1143,24 @@ async fn invoke_planner(
         }
         let mut task = Task::new(repo_name, pt.title, pt.description);
         task.requirement_id = pt.requirement_id;
-        task.acceptance_criteria = pt.acceptance_criteria;
+
+        // If the planner produced a structured spec, store it and use its criteria
+        if let Some(spec) = pt.spec {
+            task.acceptance_criteria = spec.tagged_acceptance_criteria();
+            // If no explicit requirement_id, use the first spec requirement
+            if task.requirement_id.is_none() {
+                task.requirement_id = spec.requirements.first().map(|r| r.id.clone());
+            }
+            task.spec = Some(spec);
+        } else {
+            // Enrich criteria with verification tags if not already tagged
+            task.acceptance_criteria =
+                thrum_core::verification::enrich_criteria(&pt.acceptance_criteria);
+        }
+
+        // Pre-parse tagged criteria for storage
+        let audit = thrum_core::verification::audit_criteria(&task.acceptance_criteria);
+        task.tagged_criteria = audit.tagged_criteria;
         task_store.insert(task)?;
         created += 1;
     }
@@ -1223,6 +1178,9 @@ struct PlannerTask {
     acceptance_criteria: Vec<String>,
     #[serde(default)]
     requirement_id: Option<String>,
+    /// Structured spec produced by planner agent (optional).
+    #[serde(default)]
+    spec: Option<Spec>,
 }
 
 // ─── Release Pipeline ───────────────────────────────────────────────────
@@ -1251,7 +1209,6 @@ async fn cmd_release(
         println!("\nWould generate artifacts:");
         println!("  - verification-report.json (gate results)");
         println!("  - test-report.json");
-        println!("  - traceability-matrix.csv");
         println!("  - checksums.sha256");
 
         // Show gate status for each repo
@@ -1334,22 +1291,7 @@ async fn cmd_release(
     std::fs::write(&report_path, serde_json::to_string_pretty(&verification)?)?;
     println!("\nWrote {}", report_path.display());
 
-    // Step 5: Traceability matrix export
-    let trace_store = TraceStore::new(db);
-    let task_store = TaskStore::new(db);
-    let mut needs_json = NeedsJson::new("Thrum", &tag);
-    for task in task_store.list(None, None)? {
-        for record in trace_store.get_for_task(task.id.0)? {
-            for need in trace_record_to_needs(&record) {
-                needs_json.add(need);
-            }
-        }
-    }
-    let trace_path = release_dir.join("traceability.json");
-    std::fs::write(&trace_path, needs_json.to_json()?)?;
-    println!("Wrote {}", trace_path.display());
-
-    // Step 6: Checksums
+    // Step 5: Checksums
     let mut checksums = String::new();
     for entry in std::fs::read_dir(&release_dir)? {
         let entry = entry?;
@@ -1421,6 +1363,7 @@ fn cmd_traces(trace_dir: &Path, action: TracesAction) -> Result<()> {
                 level,
                 target_prefix: target,
                 field_filter,
+                pipeline_only: false,
             };
 
             let events = reader.read_events(&trace_filter)?;
@@ -1488,10 +1431,16 @@ fn cmd_task(db: &redb::Database, action: TaskAction, trace_dir: &Path) -> Result
                 let content = std::fs::read_to_string(&spec_path)
                     .context(format!("failed to read spec: {}", spec_path.display()))?;
                 let parsed_spec = Spec::from_toml(&content)?;
-                task.acceptance_criteria = parsed_spec.acceptance_criteria.clone();
+                // Enrich spec criteria with verification tags
+                task.acceptance_criteria =
+                    thrum_core::verification::enrich_criteria(&parsed_spec.acceptance_criteria);
                 task.spec = Some(parsed_spec);
             }
 
+            // Pre-parse tagged criteria for storage
+            let audit = thrum_core::verification::audit_criteria(&task.acceptance_criteria);
+            task.tagged_criteria = audit.tagged_criteria;
+
             let task = store.insert(task)?;
             println!("Created {}: {}", task.id, task.title);
         }
@@ -1536,12 +1485,14 @@ fn cmd_task(db: &redb::Database, action: TaskAction, trace_dir: &Path) -> Result
             store.update(&task)?;
             println!("Approved {}: {}", task.id, task.title);
         }
-        TaskAction::Reject { id, feedback } => {
+        TaskAction::Reject { id, feedback, .. } => {
             let mut task = store
                 .get(&TaskId(id))?
                 .context(format!("task {id} not found"))?;
 
-            task.status = TaskStatus::Rejected { feedback };
+            task.status = TaskStatus::Rejected {
+                feedback: feedback.clone(),
+            };
             task.updated_at = Utc::now();
             store.update(&task)?;
             println!("Rejected {}: {}", task.id, task.title);
@@ -1558,13 +1509,25 @@ fn cmd_task(db: &redb::Database, action: TaskAction, trace_dir: &Path) -> Result
                 .context(format!("task {id} not found"))?;
 
             task.status = match status.as_str() {
-                "pending" => TaskStatus::Pending,
+                "pending" => {
+                    task.retry_count = 0;
+                    TaskStatus::Pending
+                }
                 "merged" => TaskStatus::Merged {
                     commit_sha: "manually-set".into(),
                 },
                 "approved" => TaskStatus::Approved,
+                "awaiting-ci" => TaskStatus::AwaitingCI {
+                    pr_number: 0,
+                    pr_url: "manually-set".into(),
+                    branch: task.branch_name(),
+                    started_at: Utc::now(),
+                    ci_attempts: 0,
+                },
                 other => {
-                    anyhow::bail!("unsupported status '{other}'. Use: pending, approved, merged")
+                    anyhow::bail!(
+                        "unsupported status '{other}'. Use: pending, approved, merged, awaiting-ci"
+                    )
                 }
             };
             task.updated_at = Utc::now();
@@ -1602,6 +1565,58 @@ fn cmd_task(db: &redb::Database, action: TaskAction, trace_dir: &Path) -> Result
             }
         }
 
+        TaskAction::Spec { id, set, toml } => {
+            let task_id = TaskId(id);
+
+            if let Some(spec_path) = set {
+                // Set mode: load spec from TOML and store on task
+                let mut task = store
+                    .get(&task_id)?
+                    .context(format!("task {id} not found"))?;
+
+                let content = std::fs::read_to_string(&spec_path)
+                    .context(format!("failed to read spec: {}", spec_path.display()))?;
+                let parsed_spec = Spec::from_toml(&content)?;
+
+                // Update acceptance criteria from spec
+                task.acceptance_criteria = parsed_spec.tagged_acceptance_criteria();
+                let audit = thrum_core::verification::audit_criteria(&task.acceptance_criteria);
+                task.tagged_criteria = audit.tagged_criteria;
+
+                // Use first spec requirement as requirement_id if not already set
+                if task.requirement_id.is_none() {
+                    task.requirement_id = parsed_spec.requirements.first().map(|r| r.id.clone());
+                }
+
+                task.spec = Some(parsed_spec);
+                task.updated_at = Utc::now();
+                store.update(&task)?;
+                println!("Spec set for TASK-{id:04}: {}", task.title);
+            } else {
+                // View mode: print spec
+                let task = store
+                    .get(&task_id)?
+                    .context(format!("task {id} not found"))?;
+
+                match &task.spec {
+                    Some(spec) => {
+                        if toml {
+                            match spec.to_toml() {
+                                Ok(toml_str) => println!("{toml_str}"),
+                                Err(e) => anyhow::bail!("failed to serialize spec: {e}"),
+                            }
+                        } else {
+                            println!("{}", spec.to_markdown());
+                        }
+                    }
+                    None => {
+                        println!("TASK-{id:04} has no structured spec.");
+                        println!("\nUse `thrum task spec {id} --set <spec.toml>` to attach one.");
+                    }
+                }
+            }
+        }
+
         TaskAction::Export { id, format, output } => {
             use thrum_core::session_export::ExportFormat;
 
@@ -1806,7 +1821,7 @@ fn cmd_memory(db: &redb::Database, action: MemoryAction) -> Result<()> {
     Ok(())
 }
 
-fn cmd_status(db: &redb::Database, config_path: &Path) -> Result<()> {
+fn cmd_status(db: &redb::Database) -> Result<()> {
     let store = TaskStore::new(db);
     let counts = store.status_counts()?;
 
@@ -1818,182 +1833,5 @@ fn cmd_status(db: &redb::Database, config_path: &Path) -> Result<()> {
     let total: usize = counts.values().sum();
     println!("  {:<20} {total}", "total");
 
-    if config_path.exists() {
-        println!("\nConsistency: run `thrum check` for full report");
-    }
-
-    Ok(())
-}
-
-fn cmd_check(config_path: &Path) -> Result<()> {
-    let repos_config = ReposConfig::load(config_path)?;
-    let mut repo_paths: HashMap<RepoName, &std::path::Path> = HashMap::new();
-    for repo in &repos_config.repo {
-        repo_paths.insert(repo.name.clone(), &repo.path);
-    }
-
-    let report = check_consistency(&repo_paths)?;
-
-    println!("=== Consistency Report ===\n");
-    println!("wasmparser versions:");
-    for (repo, ver) in &report.wasmparser_versions {
-        println!("  {repo}: {ver}");
-    }
-    println!("\nRust editions:");
-    for (repo, ed) in &report.rust_editions {
-        println!("  {repo}: {ed}");
-    }
-
-    if report.issues.is_empty() {
-        println!("\nNo issues found.");
-    } else {
-        println!("\nIssues ({}):", report.issues.len());
-        for issue in &report.issues {
-            println!("  - {issue}");
-        }
-    }
-
-    Ok(())
-}
-
-fn cmd_trace(db: &redb::Database, action: TraceAction) -> Result<()> {
-    let trace_store = TraceStore::new(db);
-
-    match action {
-        TraceAction::Export { output, version } => {
-            let mut needs_json = NeedsJson::new("Thrum", &version);
-            let task_store = TaskStore::new(db);
-            for task in task_store.list(None, None)? {
-                for record in trace_store.get_for_task(task.id.0)? {
-                    for need in trace_record_to_needs(&record) {
-                        needs_json.add(need);
-                    }
-                }
-            }
-            let json = needs_json.to_json()?;
-            if let Some(parent) = output.parent() {
-                std::fs::create_dir_all(parent)?;
-            }
-            std::fs::write(&output, &json)?;
-            println!(
-                "Exported {} needs to {}",
-                needs_json.needs.len(),
-                output.display()
-            );
-        }
-        TraceAction::Rst { tool, output } => {
-            let rst = thrum_core::sphinx_needs::generate_traceability_rst(&tool);
-            let out_path =
-                output.unwrap_or_else(|| PathBuf::from(format!("docs/traceability/{tool}.rst")));
-            if let Some(parent) = out_path.parent() {
-                std::fs::create_dir_all(parent)?;
-            }
-            std::fs::write(&out_path, &rst)?;
-            println!("Generated RST at {}", out_path.display());
-        }
-        TraceAction::Gaps => {
-            let task_store = TaskStore::new(db);
-            println!("=== Traceability Gaps ===\n");
-            let mut has_gaps = false;
-            for task in task_store.list(None, None)? {
-                if let Some(ref req_id) = task.requirement_id {
-                    let records = trace_store.get_for_task(task.id.0)?;
-                    let has_test = records.iter().any(|r| {
-                        matches!(
-                            r.artifact,
-                            thrum_core::traceability::TraceArtifact::Test { .. }
-                        )
-                    });
-                    let has_proof = records.iter().any(|r| {
-                        matches!(
-                            r.artifact,
-                            thrum_core::traceability::TraceArtifact::Proof { .. }
-                        )
-                    });
-                    let has_review = records.iter().any(|r| {
-                        matches!(
-                            r.artifact,
-                            thrum_core::traceability::TraceArtifact::Review { .. }
-                        )
-                    });
-
-                    let mut gaps = Vec::new();
-                    if !has_test {
-                        gaps.push("test");
-                    }
-                    if !has_proof {
-                        gaps.push("proof");
-                    }
-                    if !has_review {
-                        gaps.push("review");
-                    }
-
-                    if !gaps.is_empty() {
-                        has_gaps = true;
-                        println!("  {} ({}): missing {}", req_id, task.repo, gaps.join(", "));
-                    }
-                }
-            }
-            if !has_gaps {
-                println!("  No gaps found.");
-            }
-        }
-    }
-    Ok(())
-}
-
-fn cmd_safety() -> Result<()> {
-    use thrum_core::safety::*;
-
-    println!("=== Tool Safety Classification ===\n");
-    println!(
-        "{:<8} {:<5} {:<5} {:<6} {:<10} QUALIFICATION",
-        "TOOL", "TI", "TD", "TCL", "ASIL"
-    );
-    println!("{}", "-".repeat(70));
-
-    let tools = [
-        ("loom", ToolImpact::Ti2, ToolDetection::Td2, "ASIL B"),
-        ("synth", ToolImpact::Ti2, ToolDetection::Td3, "ASIL D"),
-        ("meld", ToolImpact::Ti2, ToolDetection::Td2, "QM"),
-    ];
-
-    for (name, ti, td, asil) in &tools {
-        let tcl = determine_tcl(*ti, *td);
-        let methods = qualification_methods(tcl);
-        let method_str = if methods.is_empty() {
-            "None required".to_string()
-        } else {
-            format!("{} methods", methods.len())
-        };
-        println!(
-            "{:<8} {:<5} {:<5} {:<6} {:<10} {}",
-            name,
-            format!("{ti:?}"),
-            format!("{td:?}"),
-            tcl,
-            asil,
-            method_str,
-        );
-    }
-
-    println!("\n=== Qualification Methods ===\n");
-    for (name, ti, td, _) in &tools {
-        let tcl = determine_tcl(*ti, *td);
-        let methods = qualification_methods(tcl);
-        if !methods.is_empty() {
-            println!("{name} ({tcl}):");
-            for m in methods {
-                println!("  - {m}");
-            }
-            println!();
-        }
-    }
-
-    println!("=== ASPICE Process Mapping ===\n");
-    for (stage, process) in pipeline_aspice_mapping() {
-        println!("  {stage:<40} → {process}");
-    }
-
     Ok(())
 }
diff --git a/crates/thrum-cli/src/watch.rs b/crates/thrum-cli/src/watch.rs
deleted file mode 100644
index da85447..0000000
--- a/crates/thrum-cli/src/watch.rs
+++ /dev/null
@@ -1,1096 +0,0 @@
-//! Live TUI dashboard for pipeline observability.
-//!
-//! Subscribes to the `EventBus` broadcast channel and renders a split-pane
-//! dashboard with per-agent panels, a scrollable output log, and a bottom
-//! status bar showing aggregate counts.
-
-use std::collections::HashMap;
-use std::io::{self, Stdout};
-use std::sync::Arc;
-use std::time::{Duration, Instant};
-
-use anyhow::Result;
-use crossterm::event::{self, Event, KeyCode, KeyEvent, KeyModifiers};
-use crossterm::terminal::{
-    EnterAlternateScreen, LeaveAlternateScreen, disable_raw_mode, enable_raw_mode,
-};
-use crossterm::{ExecutableCommand, execute};
-use ratatui::Terminal;
-use ratatui::backend::CrosstermBackend;
-use ratatui::layout::{Constraint, Direction, Layout, Rect};
-use ratatui::style::{Color, Modifier, Style};
-use ratatui::text::{Line, Span};
-use ratatui::widgets::{Block, Borders, List, ListItem, Paragraph};
-use thrum_core::agent::AgentId;
-use thrum_core::event::{EventKind, PipelineEvent};
-use thrum_core::task::TaskId;
-use thrum_db::task_store::TaskStore;
-use thrum_runner::parallel::PipelineContext;
-
-/// Per-agent state tracked by the TUI.
-struct AgentPanel {
-    agent_id: AgentId,
-    task_id: TaskId,
-    task_title: String,
-    repo: String,
-    stage: String,
-    last_tool: String,
-    insertions: u32,
-    deletions: u32,
-    files_changed: u32,
-    log_lines: Vec<String>,
-    started_at: Instant,
-    finished: bool,
-    success: Option<bool>,
-}
-
-impl AgentPanel {
-    fn elapsed_display(&self) -> String {
-        let secs = self.started_at.elapsed().as_secs();
-        let mins = secs / 60;
-        let secs = secs % 60;
-        format!("{mins}m{secs:02}s")
-    }
-
-    fn diff_summary(&self) -> String {
-        format!(
-            "+{} -{} ~{}",
-            self.insertions, self.deletions, self.files_changed
-        )
-    }
-}
-
-/// Top-level TUI application state.
-struct WatchApp {
-    agents: HashMap<String, AgentPanel>,
-    /// Ordered list of agent keys for stable rendering.
-    agent_order: Vec<String>,
-    /// Index of the currently selected agent panel (for log scrolling).
-    selected: usize,
-    /// Scroll offset within the selected agent's log.
-    scroll_offset: usize,
-    /// Engine-level log messages.
-    engine_log: Vec<String>,
-    /// Cached task status counts for the bottom bar.
-    queue_pending: usize,
-    queue_active: usize,
-    queue_total: usize,
-}
-
-impl WatchApp {
-    fn new() -> Self {
-        Self {
-            agents: HashMap::new(),
-            agent_order: Vec::new(),
-            selected: 0,
-            scroll_offset: 0,
-            engine_log: Vec::new(),
-            queue_pending: 0,
-            queue_active: 0,
-            queue_total: 0,
-        }
-    }
-
-    /// Process a pipeline event and update internal state.
-    fn handle_event(&mut self, event: &PipelineEvent) {
-        match &event.kind {
-            EventKind::AgentStarted {
-                agent_id,
-                task_id,
-                repo,
-            } => {
-                let key = agent_id.0.clone();
-                let panel = AgentPanel {
-                    agent_id: agent_id.clone(),
-                    task_id: task_id.clone(),
-                    task_title: String::new(),
-                    repo: repo.to_string(),
-                    stage: "implementing".into(),
-                    last_tool: String::new(),
-                    insertions: 0,
-                    deletions: 0,
-                    files_changed: 0,
-                    log_lines: vec![format!(
-                        "[{}] Agent started",
-                        event.timestamp.format("%H:%M:%S")
-                    )],
-                    started_at: Instant::now(),
-                    finished: false,
-                    success: None,
-                };
-                self.agents.insert(key.clone(), panel);
-                if !self.agent_order.contains(&key) {
-                    self.agent_order.push(key);
-                }
-            }
-
-            EventKind::AgentOutput { agent_id, line, .. } => {
-                if let Some(panel) = self.agents.get_mut(&agent_id.0) {
-                    // Extract tool usage from Claude output lines
-                    if (line.contains("Tool:") || line.contains("tool_use"))
-                        && let Some(tool) = extract_tool_name(line)
-                    {
-                        panel.last_tool = tool;
-                    }
-                    panel.log_lines.push(line.clone());
-                    // Cap log buffer to prevent unbounded growth
-                    if panel.log_lines.len() > 5000 {
-                        panel.log_lines.drain(..1000);
-                    }
-                }
-            }
-
-            EventKind::AgentFinished {
-                agent_id,
-                success,
-                elapsed_secs,
-                ..
-            } => {
-                if let Some(panel) = self.agents.get_mut(&agent_id.0) {
-                    panel.finished = true;
-                    panel.success = Some(*success);
-                    let status = if *success { "OK" } else { "FAIL" };
-                    panel
-                        .log_lines
-                        .push(format!("Agent finished: {status} ({elapsed_secs:.1}s)"));
-                }
-            }
-
-            EventKind::TaskStateChange { task_id, to, .. } => {
-                // Update stage for any agent working on this task
-                for panel in self.agents.values_mut() {
-                    if panel.task_id == *task_id {
-                        panel.stage = to.clone();
-                    }
-                }
-            }
-
-            EventKind::GateStarted { task_id, level } => {
-                let stage = format!("{level}");
-                for panel in self.agents.values_mut() {
-                    if panel.task_id == *task_id {
-                        panel.stage = stage.clone();
-                        panel.log_lines.push(format!("Gate started: {level}"));
-                    }
-                }
-            }
-
-            EventKind::GateOutput {
-                task_id,
-                check_name,
-                line,
-                ..
-            } => {
-                for panel in self.agents.values_mut() {
-                    if panel.task_id == *task_id {
-                        panel.log_lines.push(format!("gate/{check_name}: {line}"));
-                    }
-                }
-            }
-
-            EventKind::GateCheckFinished {
-                task_id,
-                check_name,
-                passed,
-                ..
-            } => {
-                let status = if *passed { "PASS" } else { "FAIL" };
-                for panel in self.agents.values_mut() {
-                    if panel.task_id == *task_id {
-                        panel.log_lines.push(format!("gate/{check_name}: {status}"));
-                    }
-                }
-            }
-
-            EventKind::GateFinished {
-                task_id,
-                level,
-                passed,
-                duration_secs,
-            } => {
-                let status = if *passed { "PASS" } else { "FAIL" };
-                for panel in self.agents.values_mut() {
-                    if panel.task_id == *task_id {
-                        panel
-                            .log_lines
-                            .push(format!("{level}: {status} ({duration_secs:.1}s)"));
-                    }
-                }
-            }
-
-            EventKind::FileChanged {
-                agent_id,
-                path,
-                kind,
-                ..
-            } => {
-                if let Some(panel) = self.agents.get_mut(&agent_id.0) {
-                    let tag = match kind {
-                        thrum_core::event::FileChangeKind::Created => "created",
-                        thrum_core::event::FileChangeKind::Modified => "modified",
-                        thrum_core::event::FileChangeKind::Deleted => "deleted",
-                    };
-                    panel
-                        .log_lines
-                        .push(format!("file {tag}: {}", path.display()));
-                }
-            }
-
-            EventKind::DiffUpdate {
-                agent_id,
-                files_changed,
-                insertions,
-                deletions,
-                ..
-            } => {
-                if let Some(panel) = self.agents.get_mut(&agent_id.0) {
-                    panel.files_changed = *files_changed;
-                    panel.insertions = *insertions;
-                    panel.deletions = *deletions;
-                }
-            }
-
-            EventKind::EngineLog { level, message } => {
-                let tag = match level {
-                    thrum_core::event::LogLevel::Info => "INFO",
-                    thrum_core::event::LogLevel::Warn => "WARN",
-                    thrum_core::event::LogLevel::Error => "ERR ",
-                };
-                self.engine_log.push(format!("[{tag}] {message}"));
-                if self.engine_log.len() > 200 {
-                    self.engine_log.drain(..50);
-                }
-            }
-
-            EventKind::CheckpointSaved { task_id, phase, .. } => {
-                self.engine_log
-                    .push(format!("[CKPT] {task_id} checkpoint saved at {phase}"));
-            }
-
-            EventKind::SessionContinued {
-                task_id,
-                session_id,
-                ..
-            } => {
-                self.engine_log.push(format!(
-                    "[SESSION] {task_id} continuing session {session_id}"
-                ));
-            }
-
-            // -- Agent-to-agent coordination events --
-            EventKind::FileConflictDetected {
-                conflict, policy, ..
-            } => {
-                let policy_tag = match policy {
-                    thrum_core::coordination::ConflictPolicy::WarnAndContinue => "warn",
-                    thrum_core::coordination::ConflictPolicy::Serialize => "serialize",
-                };
-                self.engine_log.push(format!(
-                    "[CONFLICT/{policy_tag}] {} between {} and {} on {}",
-                    conflict.path.display(),
-                    conflict.first_agent,
-                    conflict.second_agent,
-                    conflict.repo,
-                ));
-                // Also notify both agent panels
-                for aid in [&conflict.first_agent, &conflict.second_agent] {
-                    if let Some(panel) = self.agents.get_mut(&aid.0) {
-                        panel
-                            .log_lines
-                            .push(format!("⚠ file conflict: {}", conflict.path.display()));
-                    }
-                }
-            }
-
-            EventKind::CrossAgentNotification {
-                source, message, ..
-            } => {
-                self.engine_log
-                    .push(format!("[NOTIFY] {source}: {message}"));
-            }
-
-            EventKind::SharedMemoryWrite {
-                agent_id,
-                key,
-                value,
-            } => {
-                self.engine_log
-                    .push(format!("[SHARED] {agent_id} set {key}={value}"));
-            }
-
-            EventKind::TaskConvergenceDetected {
-                task_id,
-                strategy,
-                repeated_count,
-            } => {
-                self.engine_log.push(format!(
-                    "[CONVERGENCE] {task_id}: strategy={strategy}, repeats={repeated_count}"
-                ));
-                // Also notify the agent panel working on this task
-                for panel in self.agents.values_mut() {
-                    if panel.task_id == *task_id {
-                        panel.log_lines.push(format!(
-                            "convergence detected: {strategy} (repeats={repeated_count})"
-                        ));
-                    }
-                }
-            }
-        }
-    }
-
-    /// Refresh task counts from the database.
-    fn refresh_queue_counts(&mut self, ctx: &PipelineContext) {
-        let store = TaskStore::new(&ctx.db);
-        if let Ok(counts) = store.status_counts() {
-            let pending = counts.get("pending").copied().unwrap_or(0);
-            let active = counts.get("claimed").copied().unwrap_or(0)
-                + counts.get("implementing").copied().unwrap_or(0)
-                + counts.get("reviewing").copied().unwrap_or(0)
-                + counts.get("integrating").copied().unwrap_or(0);
-            let total: usize = counts.values().sum();
-            self.queue_pending = pending;
-            self.queue_active = active;
-            self.queue_total = total;
-        }
-
-        // Also populate task titles from DB for any agents missing them
-        let task_store = TaskStore::new(&ctx.db);
-        for panel in self.agents.values_mut() {
-            if panel.task_title.is_empty()
-                && let Ok(Some(task)) = task_store.get(&panel.task_id)
-            {
-                panel.task_title = task.title;
-            }
-        }
-    }
-
-    fn active_agent_count(&self) -> usize {
-        self.agents.values().filter(|p| !p.finished).count()
-    }
-
-    fn idle_agent_count(&self) -> usize {
-        self.agents.values().filter(|p| p.finished).count()
-    }
-
-    fn scroll_up(&mut self) {
-        self.scroll_offset = self.scroll_offset.saturating_sub(3);
-    }
-
-    fn scroll_down(&mut self) {
-        if let Some(key) = self.agent_order.get(self.selected)
-            && let Some(panel) = self.agents.get(key)
-        {
-            let max = panel.log_lines.len().saturating_sub(1);
-            self.scroll_offset = (self.scroll_offset + 3).min(max);
-        }
-    }
-
-    fn select_prev(&mut self) {
-        if !self.agent_order.is_empty() {
-            if self.selected > 0 {
-                self.selected -= 1;
-            } else {
-                self.selected = self.agent_order.len() - 1;
-            }
-            self.scroll_offset = 0;
-        }
-    }
-
-    fn select_next(&mut self) {
-        if !self.agent_order.is_empty() {
-            self.selected = (self.selected + 1) % self.agent_order.len();
-            self.scroll_offset = 0;
-        }
-    }
-}
-
-/// Try to extract a tool name from an agent output line.
-fn extract_tool_name(line: &str) -> Option<String> {
-    // Common patterns in Claude CLI output
-    for prefix in &["Tool: ", "tool_use: ", "Using tool: "] {
-        if let Some(rest) = line.strip_prefix(prefix) {
-            return Some(rest.split_whitespace().next().unwrap_or(rest).to_string());
-        }
-    }
-    if line.contains("tool_use") {
-        // Try to find tool name in JSON-ish output
-        if let Some(start) = line.find("\"name\":") {
-            let rest = &line[start + 7..];
-            let rest = rest.trim().trim_start_matches('"');
-            if let Some(end) = rest.find('"') {
-                return Some(rest[..end].to_string());
-            }
-        }
-    }
-    None
-}
-
-/// Render the TUI to the terminal frame.
-fn render(frame: &mut ratatui::Frame, app: &WatchApp) {
-    let size = frame.area();
-
-    // Main layout: agent panels on top, bottom status bar
-    let main_chunks = Layout::default()
-        .direction(Direction::Vertical)
-        .constraints([Constraint::Min(8), Constraint::Length(3)])
-        .split(size);
-
-    render_agent_panels(frame, app, main_chunks[0]);
-    render_bottom_bar(frame, app, main_chunks[1]);
-}
-
-/// Render the split-pane agent panels area.
-fn render_agent_panels(frame: &mut ratatui::Frame, app: &WatchApp, area: Rect) {
-    if app.agent_order.is_empty() {
-        // No agents yet — show a waiting message
-        let msg = Paragraph::new("Waiting for agents to start...")
-            .style(Style::default().fg(Color::DarkGray))
-            .block(
-                Block::default()
-                    .borders(Borders::ALL)
-                    .title(" thrum watch "),
-            );
-        frame.render_widget(msg, area);
-        return;
-    }
-
-    // Split available space evenly among active agents (up to 4 visible)
-    let visible_agents: Vec<&str> = app.agent_order.iter().map(|s| s.as_str()).collect();
-    let num_panels = visible_agents.len().min(4);
-
-    if num_panels == 0 {
-        return;
-    }
-
-    // Arrange panels in a grid: 1 → 1×1, 2 → 2×1, 3-4 → 2×2
-    if num_panels <= 2 {
-        let constraints: Vec<Constraint> = (0..num_panels)
-            .map(|_| Constraint::Ratio(1, num_panels as u32))
-            .collect();
-        let chunks = Layout::default()
-            .direction(Direction::Horizontal)
-            .constraints(constraints)
-            .split(area);
-
-        for (i, key) in visible_agents.iter().take(num_panels).enumerate() {
-            if let Some(panel) = app.agents.get(*key) {
-                let is_selected = i == app.selected;
-                render_single_panel(frame, panel, chunks[i], is_selected, app.scroll_offset);
-            }
-        }
-    } else {
-        // 2×2 grid
-        let rows = Layout::default()
-            .direction(Direction::Vertical)
-            .constraints([Constraint::Ratio(1, 2), Constraint::Ratio(1, 2)])
-            .split(area);
-
-        let top_cols = Layout::default()
-            .direction(Direction::Horizontal)
-            .constraints([Constraint::Ratio(1, 2), Constraint::Ratio(1, 2)])
-            .split(rows[0]);
-
-        let bottom_cols = Layout::default()
-            .direction(Direction::Horizontal)
-            .constraints([Constraint::Ratio(1, 2), Constraint::Ratio(1, 2)])
-            .split(rows[1]);
-
-        let slots = [top_cols[0], top_cols[1], bottom_cols[0], bottom_cols[1]];
-        for (i, key) in visible_agents.iter().take(4).enumerate() {
-            if let Some(panel) = app.agents.get(*key) {
-                let is_selected = i == app.selected;
-                render_single_panel(frame, panel, slots[i], is_selected, app.scroll_offset);
-            }
-        }
-    }
-}
-
-/// Render a single agent panel with header info and scrollable log.
-fn render_single_panel(
-    frame: &mut ratatui::Frame,
-    panel: &AgentPanel,
-    area: Rect,
-    is_selected: bool,
-    scroll_offset: usize,
-) {
-    // Panel border style — highlight selected panel
-    let border_style = if is_selected {
-        Style::default().fg(Color::Cyan)
-    } else if panel.finished {
-        match panel.success {
-            Some(true) => Style::default().fg(Color::Green),
-            Some(false) => Style::default().fg(Color::Red),
-            None => Style::default().fg(Color::DarkGray),
-        }
-    } else {
-        Style::default().fg(Color::White)
-    };
-
-    let title_text = if panel.task_title.is_empty() {
-        format!(" {} | {} ", panel.task_id, panel.agent_id)
-    } else {
-        // Truncate title to keep panel header readable
-        let max_title_len = area.width.saturating_sub(20) as usize;
-        let truncated: String = panel.task_title.chars().take(max_title_len).collect();
-        format!(" {} {} ", panel.task_id, truncated)
-    };
-
-    let block = Block::default()
-        .borders(Borders::ALL)
-        .border_style(border_style)
-        .title(title_text);
-
-    let inner = block.inner(area);
-    frame.render_widget(block, area);
-
-    if inner.height < 3 {
-        return;
-    }
-
-    // Split inner area: 3-line header + scrollable log
-    let inner_chunks = Layout::default()
-        .direction(Direction::Vertical)
-        .constraints([Constraint::Length(3), Constraint::Min(1)])
-        .split(inner);
-
-    // Header: stage, last tool, diff stats, elapsed
-    let status_icon = if panel.finished {
-        match panel.success {
-            Some(true) => "✓",
-            Some(false) => "✗",
-            None => "?",
-        }
-    } else {
-        "▸"
-    };
-
-    let header_lines = vec![
-        Line::from(vec![
-            Span::styled(
-                format!("{status_icon} "),
-                Style::default().fg(if panel.finished {
-                    if panel.success.unwrap_or(false) {
-                        Color::Green
-                    } else {
-                        Color::Red
-                    }
-                } else {
-                    Color::Yellow
-                }),
-            ),
-            Span::styled(&panel.stage, Style::default().fg(Color::Cyan)),
-            Span::raw("  "),
-            Span::styled(
-                panel.elapsed_display(),
-                Style::default().fg(Color::DarkGray),
-            ),
-        ]),
-        Line::from(vec![
-            Span::styled("repo: ", Style::default().fg(Color::DarkGray)),
-            Span::raw(&panel.repo),
-            Span::raw("  "),
-            Span::styled("diff: ", Style::default().fg(Color::DarkGray)),
-            Span::styled(panel.diff_summary(), Style::default().fg(Color::Yellow)),
-        ]),
-        Line::from(vec![
-            Span::styled("tool: ", Style::default().fg(Color::DarkGray)),
-            Span::styled(
-                if panel.last_tool.is_empty() {
-                    "—"
-                } else {
-                    &panel.last_tool
-                },
-                Style::default().fg(Color::Magenta),
-            ),
-        ]),
-    ];
-
-    let header = Paragraph::new(header_lines);
-    frame.render_widget(header, inner_chunks[0]);
-
-    // Log area with scrolling (only for selected panel)
-    let log_height = inner_chunks[1].height as usize;
-    let total_lines = panel.log_lines.len();
-    let effective_offset = if is_selected {
-        scroll_offset.min(total_lines.saturating_sub(log_height))
-    } else {
-        // For non-selected panels, auto-scroll to bottom
-        total_lines.saturating_sub(log_height)
-    };
-
-    let visible_lines: Vec<ListItem> = panel
-        .log_lines
-        .iter()
-        .skip(effective_offset)
-        .take(log_height)
-        .map(|line| {
-            let style = if line.contains("FAIL") || line.contains("error") {
-                Style::default().fg(Color::Red)
-            } else if line.contains("PASS") || line.contains("OK") {
-                Style::default().fg(Color::Green)
-            } else if line.starts_with("gate/") {
-                Style::default().fg(Color::Blue)
-            } else {
-                Style::default().fg(Color::DarkGray)
-            };
-            ListItem::new(Line::from(Span::styled(line.as_str(), style)))
-        })
-        .collect();
-
-    let log_list = List::new(visible_lines);
-    frame.render_widget(log_list, inner_chunks[1]);
-}
-
-/// Render the bottom status bar.
-fn render_bottom_bar(frame: &mut ratatui::Frame, app: &WatchApp, area: Rect) {
-    let active = app.active_agent_count();
-    let idle = app.idle_agent_count();
-    let total = active + idle;
-
-    let status_line = Line::from(vec![
-        Span::styled(" Agents: ", Style::default().add_modifier(Modifier::BOLD)),
-        Span::styled(
-            format!("{active}"),
-            Style::default()
-                .fg(Color::Green)
-                .add_modifier(Modifier::BOLD),
-        ),
-        Span::styled(" active ", Style::default().fg(Color::DarkGray)),
-        Span::styled(format!("{idle}"), Style::default().fg(Color::Yellow)),
-        Span::styled(" idle ", Style::default().fg(Color::DarkGray)),
-        Span::styled(format!("{total}"), Style::default().fg(Color::White)),
-        Span::styled(" total", Style::default().fg(Color::DarkGray)),
-        Span::raw("  │  "),
-        Span::styled("Queue: ", Style::default().add_modifier(Modifier::BOLD)),
-        Span::styled(
-            format!("{}", app.queue_pending),
-            Style::default().fg(Color::Cyan),
-        ),
-        Span::styled(" pending ", Style::default().fg(Color::DarkGray)),
-        Span::styled(
-            format!("{}", app.queue_active),
-            Style::default().fg(Color::Green),
-        ),
-        Span::styled(" active ", Style::default().fg(Color::DarkGray)),
-        Span::styled(
-            format!("{}", app.queue_total),
-            Style::default().fg(Color::White),
-        ),
-        Span::styled(" total", Style::default().fg(Color::DarkGray)),
-        Span::raw("  │  "),
-        Span::styled(
-            "Ctrl+Q",
-            Style::default()
-                .fg(Color::Yellow)
-                .add_modifier(Modifier::BOLD),
-        ),
-        Span::styled(" quit  ", Style::default().fg(Color::DarkGray)),
-        Span::styled(
-            "←→",
-            Style::default()
-                .fg(Color::Yellow)
-                .add_modifier(Modifier::BOLD),
-        ),
-        Span::styled(" panel  ", Style::default().fg(Color::DarkGray)),
-        Span::styled(
-            "↑↓",
-            Style::default()
-                .fg(Color::Yellow)
-                .add_modifier(Modifier::BOLD),
-        ),
-        Span::styled(" scroll", Style::default().fg(Color::DarkGray)),
-    ]);
-
-    let bar = Paragraph::new(status_line).block(
-        Block::default()
-            .borders(Borders::ALL)
-            .border_style(Style::default().fg(Color::DarkGray)),
-    );
-    frame.render_widget(bar, area);
-}
-
-/// Set up the terminal for TUI rendering.
-fn setup_terminal() -> Result<Terminal<CrosstermBackend<Stdout>>> {
-    enable_raw_mode()?;
-    let mut stdout = io::stdout();
-    stdout.execute(EnterAlternateScreen)?;
-    let backend = CrosstermBackend::new(stdout);
-    let terminal = Terminal::new(backend)?;
-    Ok(terminal)
-}
-
-/// Restore the terminal to normal mode.
-fn restore_terminal(terminal: &mut Terminal<CrosstermBackend<Stdout>>) -> Result<()> {
-    disable_raw_mode()?;
-    execute!(terminal.backend_mut(), LeaveAlternateScreen)?;
-    terminal.show_cursor()?;
-    Ok(())
-}
-
-/// Main entry point: run the watch TUI connected to the pipeline event bus.
-pub async fn run_watch_tui(ctx: Arc<PipelineContext>) -> Result<()> {
-    let mut terminal = setup_terminal()?;
-    let mut app = WatchApp::new();
-    let mut rx = ctx.event_bus.subscribe();
-
-    // Refresh queue counts from DB on a timer
-    let mut last_db_refresh = Instant::now();
-    let db_refresh_interval = Duration::from_secs(2);
-
-    // Initial DB refresh
-    app.refresh_queue_counts(&ctx);
-
-    let tick_rate = Duration::from_millis(100);
-
-    loop {
-        // Draw
-        terminal.draw(|frame| render(frame, &app))?;
-
-        // Poll for crossterm input events with a short timeout
-        if event::poll(tick_rate)?
-            && let Event::Key(key) = event::read()?
-        {
-            match key {
-                // Ctrl+Q or 'q' to quit
-                KeyEvent {
-                    code: KeyCode::Char('q'),
-                    modifiers: KeyModifiers::CONTROL,
-                    ..
-                }
-                | KeyEvent {
-                    code: KeyCode::Char('q'),
-                    modifiers: KeyModifiers::NONE,
-                    ..
-                } => {
-                    break;
-                }
-                // Arrow keys for navigation
-                KeyEvent {
-                    code: KeyCode::Up, ..
-                } => app.scroll_up(),
-                KeyEvent {
-                    code: KeyCode::Down,
-                    ..
-                } => app.scroll_down(),
-                KeyEvent {
-                    code: KeyCode::Left,
-                    ..
-                } => app.select_prev(),
-                KeyEvent {
-                    code: KeyCode::Right,
-                    ..
-                } => app.select_next(),
-                // Page up/down for faster scrolling
-                KeyEvent {
-                    code: KeyCode::PageUp,
-                    ..
-                } => {
-                    for _ in 0..10 {
-                        app.scroll_up();
-                    }
-                }
-                KeyEvent {
-                    code: KeyCode::PageDown,
-                    ..
-                } => {
-                    for _ in 0..10 {
-                        app.scroll_down();
-                    }
-                }
-                _ => {}
-            }
-        }
-
-        // Drain all pending events from the broadcast channel
-        loop {
-            match rx.try_recv() {
-                Ok(event) => app.handle_event(&event),
-                Err(tokio::sync::broadcast::error::TryRecvError::Empty) => break,
-                Err(tokio::sync::broadcast::error::TryRecvError::Lagged(n)) => {
-                    app.engine_log
-                        .push(format!("[WARN] Lagged: missed {n} events"));
-                    break;
-                }
-                Err(tokio::sync::broadcast::error::TryRecvError::Closed) => {
-                    break;
-                }
-            }
-        }
-
-        // Periodic DB refresh for queue counts
-        if last_db_refresh.elapsed() >= db_refresh_interval {
-            app.refresh_queue_counts(&ctx);
-            last_db_refresh = Instant::now();
-        }
-    }
-
-    restore_terminal(&mut terminal)?;
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use thrum_core::event::{EventKind, FileChangeKind, LogLevel, OutputStream, PipelineEvent};
-    use thrum_core::task::{GateLevel, RepoName};
-
-    fn make_event(kind: EventKind) -> PipelineEvent {
-        PipelineEvent::new(kind)
-    }
-
-    #[test]
-    fn agent_started_creates_panel() {
-        let mut app = WatchApp::new();
-        let event = make_event(EventKind::AgentStarted {
-            agent_id: AgentId("agent-1-loom-TASK-0001".into()),
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-        });
-        app.handle_event(&event);
-
-        assert_eq!(app.agents.len(), 1);
-        assert_eq!(app.agent_order.len(), 1);
-        assert_eq!(app.agent_order[0], "agent-1-loom-TASK-0001");
-
-        let panel = app.agents.get("agent-1-loom-TASK-0001").unwrap();
-        assert_eq!(panel.repo, "loom");
-        assert_eq!(panel.stage, "implementing");
-        assert!(!panel.finished);
-    }
-
-    #[test]
-    fn agent_output_appends_to_log() {
-        let mut app = WatchApp::new();
-        app.handle_event(&make_event(EventKind::AgentStarted {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-        }));
-        app.handle_event(&make_event(EventKind::AgentOutput {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            stream: OutputStream::Stdout,
-            line: "compiling...".into(),
-        }));
-
-        let panel = app.agents.get("agent-1").unwrap();
-        assert!(panel.log_lines.iter().any(|l| l == "compiling..."));
-    }
-
-    #[test]
-    fn agent_finished_marks_done() {
-        let mut app = WatchApp::new();
-        app.handle_event(&make_event(EventKind::AgentStarted {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-        }));
-        app.handle_event(&make_event(EventKind::AgentFinished {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            success: true,
-            elapsed_secs: 42.0,
-        }));
-
-        let panel = app.agents.get("agent-1").unwrap();
-        assert!(panel.finished);
-        assert_eq!(panel.success, Some(true));
-    }
-
-    #[test]
-    fn diff_update_tracks_stats() {
-        let mut app = WatchApp::new();
-        app.handle_event(&make_event(EventKind::AgentStarted {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-        }));
-        app.handle_event(&make_event(EventKind::DiffUpdate {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            files_changed: 5,
-            insertions: 100,
-            deletions: 20,
-        }));
-
-        let panel = app.agents.get("agent-1").unwrap();
-        assert_eq!(panel.insertions, 100);
-        assert_eq!(panel.deletions, 20);
-        assert_eq!(panel.files_changed, 5);
-        assert_eq!(panel.diff_summary(), "+100 -20 ~5");
-    }
-
-    #[test]
-    fn task_state_change_updates_stage() {
-        let mut app = WatchApp::new();
-        app.handle_event(&make_event(EventKind::AgentStarted {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-        }));
-        app.handle_event(&make_event(EventKind::TaskStateChange {
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-            from: "implementing".into(),
-            to: "reviewing".into(),
-        }));
-
-        let panel = app.agents.get("agent-1").unwrap();
-        assert_eq!(panel.stage, "reviewing");
-    }
-
-    #[test]
-    fn engine_log_captured() {
-        let mut app = WatchApp::new();
-        app.handle_event(&make_event(EventKind::EngineLog {
-            level: LogLevel::Warn,
-            message: "budget running low".into(),
-        }));
-
-        assert_eq!(app.engine_log.len(), 1);
-        assert!(app.engine_log[0].contains("budget running low"));
-    }
-
-    #[test]
-    fn navigation_wraps_around() {
-        let mut app = WatchApp::new();
-        // Add 3 agents
-        for i in 0..3 {
-            app.handle_event(&make_event(EventKind::AgentStarted {
-                agent_id: AgentId(format!("agent-{i}")),
-                task_id: TaskId(i),
-                repo: RepoName::new("loom"),
-            }));
-        }
-
-        assert_eq!(app.selected, 0);
-        app.select_next();
-        assert_eq!(app.selected, 1);
-        app.select_next();
-        assert_eq!(app.selected, 2);
-        app.select_next();
-        assert_eq!(app.selected, 0); // Wrapped
-
-        app.select_prev();
-        assert_eq!(app.selected, 2); // Wrapped backward
-    }
-
-    #[test]
-    fn active_idle_counts() {
-        let mut app = WatchApp::new();
-        app.handle_event(&make_event(EventKind::AgentStarted {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-        }));
-        app.handle_event(&make_event(EventKind::AgentStarted {
-            agent_id: AgentId("agent-2".into()),
-            task_id: TaskId(2),
-            repo: RepoName::new("synth"),
-        }));
-
-        assert_eq!(app.active_agent_count(), 2);
-        assert_eq!(app.idle_agent_count(), 0);
-
-        app.handle_event(&make_event(EventKind::AgentFinished {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            success: true,
-            elapsed_secs: 10.0,
-        }));
-
-        assert_eq!(app.active_agent_count(), 1);
-        assert_eq!(app.idle_agent_count(), 1);
-    }
-
-    #[test]
-    fn extract_tool_name_patterns() {
-        assert_eq!(extract_tool_name("Tool: Read"), Some("Read".into()));
-        assert_eq!(
-            extract_tool_name(r#"{"name":"Write","type":"tool_use"}"#),
-            Some("Write".into())
-        );
-        assert_eq!(extract_tool_name("regular output"), None);
-    }
-
-    #[test]
-    fn file_changed_appends_to_log() {
-        let mut app = WatchApp::new();
-        app.handle_event(&make_event(EventKind::AgentStarted {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-        }));
-        app.handle_event(&make_event(EventKind::FileChanged {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            path: "src/main.rs".into(),
-            kind: FileChangeKind::Modified,
-        }));
-
-        let panel = app.agents.get("agent-1").unwrap();
-        assert!(
-            panel
-                .log_lines
-                .iter()
-                .any(|l| l.contains("file modified: src/main.rs"))
-        );
-    }
-
-    #[test]
-    fn gate_events_update_panel() {
-        let mut app = WatchApp::new();
-        app.handle_event(&make_event(EventKind::AgentStarted {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-        }));
-        app.handle_event(&make_event(EventKind::GateStarted {
-            task_id: TaskId(1),
-            level: GateLevel::Quality,
-        }));
-
-        let panel = app.agents.get("agent-1").unwrap();
-        assert!(panel.stage.contains("Quality"));
-        assert!(panel.log_lines.iter().any(|l| l.contains("Gate started")));
-
-        app.handle_event(&make_event(EventKind::GateCheckFinished {
-            task_id: TaskId(1),
-            level: GateLevel::Quality,
-            check_name: "cargo_test".into(),
-            passed: true,
-        }));
-
-        let panel = app.agents.get("agent-1").unwrap();
-        assert!(
-            panel
-                .log_lines
-                .iter()
-                .any(|l| l.contains("gate/cargo_test: PASS"))
-        );
-    }
-
-    #[test]
-    fn log_buffer_capped() {
-        let mut app = WatchApp::new();
-        app.handle_event(&make_event(EventKind::AgentStarted {
-            agent_id: AgentId("agent-1".into()),
-            task_id: TaskId(1),
-            repo: RepoName::new("loom"),
-        }));
-
-        // Push 6000 lines
-        for i in 0..6000 {
-            app.handle_event(&make_event(EventKind::AgentOutput {
-                agent_id: AgentId("agent-1".into()),
-                task_id: TaskId(1),
-                stream: OutputStream::Stdout,
-                line: format!("line {i}"),
-            }));
-        }
-
-        let panel = app.agents.get("agent-1").unwrap();
-        // Should have been trimmed (5000 cap minus 1000 drain = ~4000-5000 range)
-        assert!(panel.log_lines.len() <= 5001);
-    }
-}
diff --git a/crates/thrum-core/Cargo.toml b/crates/thrum-core/Cargo.toml
index e946e19..9363c58 100644
--- a/crates/thrum-core/Cargo.toml
+++ b/crates/thrum-core/Cargo.toml
@@ -11,7 +11,6 @@ toml = { workspace = true }
 chrono = { workspace = true }
 anyhow = { workspace = true }
 thiserror = { workspace = true }
-cargo_toml = { workspace = true }
 tracing = { workspace = true }
 tracing-subscriber = { workspace = true }
 opentelemetry = { workspace = true }
diff --git a/crates/thrum-core/src/a2a.rs b/crates/thrum-core/src/a2a.rs
deleted file mode 100644
index 61f9bbc..0000000
--- a/crates/thrum-core/src/a2a.rs
+++ /dev/null
@@ -1,659 +0,0 @@
-//! A2A (Agent-to-Agent) protocol types for Thrum.
-//!
-//! Implements the A2A protocol specification for inter-agent communication.
-//! These are pure data types with serde serialization — no async runtime
-//! dependency. HTTP handlers live in `thrum-api`.
-//!
-//! The A2A protocol complements MCP (agent-to-tool) with agent-to-agent
-//! coordination: agent discovery via Agent Cards, task submission via
-//! JSON-RPC 2.0, and real-time streaming via SSE.
-
-use crate::task::{Task, TaskId, TaskStatus};
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use std::sync::atomic::{AtomicU64, Ordering};
-
-// ─── JSON-RPC 2.0 Envelope ──────────────────────────────────────────────
-
-/// JSON-RPC 2.0 error codes.
-pub const PARSE_ERROR: i64 = -32700;
-pub const INVALID_REQUEST: i64 = -32600;
-pub const METHOD_NOT_FOUND: i64 = -32601;
-pub const INVALID_PARAMS: i64 = -32602;
-pub const INTERNAL_ERROR: i64 = -32603;
-/// A2A-specific: task not found.
-pub const TASK_NOT_FOUND: i64 = -32001;
-/// A2A-specific: task is in a terminal state and cannot be canceled.
-pub const TASK_NOT_CANCELABLE: i64 = -32002;
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct JsonRpcRequest {
-    pub jsonrpc: String,
-    pub id: serde_json::Value,
-    pub method: String,
-    #[serde(default)]
-    pub params: serde_json::Value,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct JsonRpcResponse {
-    pub jsonrpc: String,
-    pub id: serde_json::Value,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub result: Option<serde_json::Value>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub error: Option<JsonRpcError>,
-}
-
-impl JsonRpcResponse {
-    pub fn success(id: serde_json::Value, result: serde_json::Value) -> Self {
-        Self {
-            jsonrpc: "2.0".into(),
-            id,
-            result: Some(result),
-            error: None,
-        }
-    }
-
-    pub fn error(id: serde_json::Value, code: i64, message: impl Into<String>) -> Self {
-        Self {
-            jsonrpc: "2.0".into(),
-            id,
-            result: None,
-            error: Some(JsonRpcError {
-                code,
-                message: message.into(),
-                data: None,
-            }),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct JsonRpcError {
-    pub code: i64,
-    pub message: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub data: Option<serde_json::Value>,
-}
-
-// ─── A2A Task Model ─────────────────────────────────────────────────────
-
-/// A2A task state, mapped from Thrum's 12-variant `TaskStatus`.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum A2aTaskState {
-    Submitted,
-    Working,
-    Completed,
-    Failed,
-    Canceled,
-    InputRequired,
-    Rejected,
-}
-
-impl A2aTaskState {
-    /// Map a Thrum `TaskStatus` to the A2A state model.
-    pub fn from_thrum_status(status: &TaskStatus) -> Self {
-        match status {
-            TaskStatus::Pending => A2aTaskState::Submitted,
-            TaskStatus::Claimed { .. } => A2aTaskState::Submitted,
-            TaskStatus::Implementing { .. } => A2aTaskState::Working,
-            TaskStatus::Gate1Failed { .. } => A2aTaskState::Failed,
-            TaskStatus::Reviewing { .. } => A2aTaskState::Working,
-            TaskStatus::Gate2Failed { .. } => A2aTaskState::Failed,
-            TaskStatus::AwaitingApproval { .. } => A2aTaskState::InputRequired,
-            TaskStatus::Approved => A2aTaskState::Working,
-            TaskStatus::Integrating => A2aTaskState::Working,
-            TaskStatus::Gate3Failed { .. } => A2aTaskState::Failed,
-            TaskStatus::Merged { .. } => A2aTaskState::Completed,
-            TaskStatus::Rejected { .. } => A2aTaskState::Rejected,
-        }
-    }
-}
-
-/// A2A message role.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum A2aRole {
-    User,
-    Agent,
-}
-
-/// A2A message part (tagged union).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(tag = "type", rename_all = "lowercase")]
-pub enum A2aPart {
-    Text {
-        text: String,
-    },
-    Data {
-        data: serde_json::Value,
-    },
-    File {
-        url: Option<String>,
-        raw: Option<String>,
-        mime_type: Option<String>,
-    },
-}
-
-/// A2A message exchanged between user and agent.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct A2aMessage {
-    pub message_id: String,
-    pub role: A2aRole,
-    pub parts: Vec<A2aPart>,
-    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
-    pub metadata: HashMap<String, serde_json::Value>,
-}
-
-/// A2A artifact produced by an agent.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct A2aArtifact {
-    pub artifact_id: String,
-    pub name: String,
-    pub parts: Vec<A2aPart>,
-    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
-    pub metadata: HashMap<String, serde_json::Value>,
-}
-
-/// A2A task status with timestamp.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct A2aTaskStatus {
-    pub state: A2aTaskState,
-    pub timestamp: DateTime<Utc>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub message: Option<String>,
-}
-
-/// Full A2A task representation.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct A2aTask {
-    pub id: String,
-    pub context_id: String,
-    pub status: A2aTaskStatus,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    pub artifacts: Vec<A2aArtifact>,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    pub history: Vec<A2aMessage>,
-    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
-    pub metadata: HashMap<String, serde_json::Value>,
-}
-
-impl A2aTask {
-    /// Convert a Thrum `Task` into an A2A task.
-    pub fn from_thrum_task(task: &Task) -> Self {
-        let state = A2aTaskState::from_thrum_status(&task.status);
-        let message = match &task.status {
-            TaskStatus::Implementing { branch, .. } => Some(format!("Working on branch {branch}")),
-            TaskStatus::Gate1Failed { report }
-            | TaskStatus::Gate2Failed { report }
-            | TaskStatus::Gate3Failed { report } => {
-                let failed_checks: Vec<&str> = report
-                    .checks
-                    .iter()
-                    .filter(|c| !c.passed)
-                    .map(|c| c.name.as_str())
-                    .collect();
-                Some(format!("Gate failed: {}", failed_checks.join(", ")))
-            }
-            TaskStatus::Reviewing { reviewer_output } => {
-                Some(format!("Under review: {}", truncate(reviewer_output, 100)))
-            }
-            TaskStatus::AwaitingApproval { .. } => Some("Awaiting human approval".into()),
-            TaskStatus::Merged { commit_sha } => Some(format!("Merged as {commit_sha}")),
-            TaskStatus::Rejected { feedback } => {
-                Some(format!("Rejected: {}", truncate(feedback, 100)))
-            }
-            _ => None,
-        };
-
-        let mut metadata = HashMap::new();
-        metadata.insert(
-            "repo".into(),
-            serde_json::Value::String(task.repo.to_string()),
-        );
-        metadata.insert(
-            "thrum_status".into(),
-            serde_json::Value::String(task.status.label().to_string()),
-        );
-        if let Some(ref req_id) = task.requirement_id {
-            metadata.insert(
-                "requirement_id".into(),
-                serde_json::Value::String(req_id.clone()),
-            );
-        }
-        if task.retry_count > 0 {
-            metadata.insert("retry_count".into(), serde_json::json!(task.retry_count));
-        }
-
-        // Build history from the task description as the initial user message
-        let initial_message = A2aMessage {
-            message_id: format!("msg-init-{}", task.id.0),
-            role: A2aRole::User,
-            parts: vec![A2aPart::Text {
-                text: format!("{}\n\n{}", task.title, task.description),
-            }],
-            metadata: HashMap::new(),
-        };
-
-        Self {
-            id: a2a_task_id(&task.id),
-            context_id: a2a_context_id(task),
-            status: A2aTaskStatus {
-                state,
-                timestamp: task.updated_at,
-                message,
-            },
-            artifacts: Vec::new(),
-            history: vec![initial_message],
-            metadata,
-        }
-    }
-}
-
-fn truncate(s: &str, max: usize) -> &str {
-    if s.len() <= max { s } else { &s[..max] }
-}
-
-// ─── Agent Card ─────────────────────────────────────────────────────────
-
-/// A2A Agent Card — describes agent capabilities for discovery.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct AgentCard {
-    pub name: String,
-    pub description: String,
-    pub url: String,
-    pub version: String,
-    #[serde(rename = "supportedInterfaces")]
-    pub supported_interfaces: Vec<String>,
-    pub capabilities: AgentCapabilities,
-    #[serde(rename = "defaultInputModes")]
-    pub default_input_modes: Vec<String>,
-    #[serde(rename = "defaultOutputModes")]
-    pub default_output_modes: Vec<String>,
-    pub skills: Vec<AgentSkill>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub provider: Option<String>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct AgentCapabilities {
-    pub streaming: bool,
-    pub push_notifications: bool,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct AgentSkill {
-    pub id: String,
-    pub name: String,
-    pub description: String,
-    #[serde(rename = "inputModes")]
-    pub input_modes: Vec<String>,
-    #[serde(rename = "outputModes")]
-    pub output_modes: Vec<String>,
-}
-
-impl AgentCard {
-    /// Build the default Thrum agent card with 3 skills.
-    pub fn thrum_default(base_url: &str) -> Self {
-        Self {
-            name: "Thrum".into(),
-            description: "Autonomous AI-driven development orchestrator. Manages tasks through a gated pipeline with quality, proof, and integration checks.".into(),
-            url: format!("{base_url}/a2a"),
-            version: env!("CARGO_PKG_VERSION").into(),
-            supported_interfaces: vec!["a2a".into()],
-            capabilities: AgentCapabilities {
-                streaming: true,
-                push_notifications: false,
-            },
-            default_input_modes: vec!["text/plain".into()],
-            default_output_modes: vec!["text/plain".into(), "application/json".into()],
-            skills: vec![
-                AgentSkill {
-                    id: "implement".into(),
-                    name: "Implement".into(),
-                    description: "Submit a development task for autonomous implementation through the quality-gated pipeline.".into(),
-                    input_modes: vec!["text/plain".into()],
-                    output_modes: vec!["text/plain".into(), "application/json".into()],
-                },
-                AgentSkill {
-                    id: "review".into(),
-                    name: "Review".into(),
-                    description: "Check the status and review output of a task in the pipeline.".into(),
-                    input_modes: vec!["text/plain".into()],
-                    output_modes: vec!["application/json".into()],
-                },
-                AgentSkill {
-                    id: "status".into(),
-                    name: "Status".into(),
-                    description: "List tasks and their current pipeline state.".into(),
-                    input_modes: vec!["text/plain".into()],
-                    output_modes: vec!["application/json".into()],
-                },
-            ],
-            provider: Some("Thrum Orchestrator".into()),
-        }
-    }
-}
-
-// ─── Method Params ──────────────────────────────────────────────────────
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SendMessageParams {
-    pub message: A2aMessage,
-    #[serde(default)]
-    pub context_id: Option<String>,
-    #[serde(default)]
-    pub task_id: Option<String>,
-    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
-    pub metadata: HashMap<String, serde_json::Value>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct GetTaskParams {
-    pub task_id: String,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ListTasksParams {
-    #[serde(default)]
-    pub context_id: Option<String>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CancelTaskParams {
-    pub task_id: String,
-}
-
-// ─── SSE Stream Events ──────────────────────────────────────────────────
-
-/// A2A SSE stream event types.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(tag = "type", rename_all = "lowercase")]
-pub enum A2aStreamEvent {
-    Task {
-        task: A2aTask,
-    },
-    Message {
-        message: A2aMessage,
-    },
-    StatusUpdate {
-        task_id: String,
-        status: A2aTaskStatus,
-    },
-    ArtifactUpdate {
-        task_id: String,
-        artifact: A2aArtifact,
-    },
-}
-
-// ─── ID Helpers ─────────────────────────────────────────────────────────
-
-static MESSAGE_COUNTER: AtomicU64 = AtomicU64::new(1);
-static ARTIFACT_COUNTER: AtomicU64 = AtomicU64::new(1);
-
-/// Convert a Thrum `TaskId` to an A2A task ID string.
-pub fn a2a_task_id(id: &TaskId) -> String {
-    format!("thrum-{}", id.0)
-}
-
-/// Parse a Thrum `TaskId` from an A2A task ID string.
-pub fn parse_thrum_task_id(a2a_id: &str) -> Option<TaskId> {
-    a2a_id
-        .strip_prefix("thrum-")?
-        .parse::<i64>()
-        .ok()
-        .map(TaskId)
-}
-
-/// Derive an A2A context ID from a Thrum task.
-pub fn a2a_context_id(task: &Task) -> String {
-    task.context_id
-        .clone()
-        .unwrap_or_else(|| format!("repo-{}", task.repo))
-}
-
-/// Generate a unique message ID.
-pub fn next_message_id() -> String {
-    format!("msg-{}", MESSAGE_COUNTER.fetch_add(1, Ordering::Relaxed))
-}
-
-/// Generate a unique artifact ID.
-pub fn next_artifact_id() -> String {
-    format!(
-        "artifact-{}",
-        ARTIFACT_COUNTER.fetch_add(1, Ordering::Relaxed)
-    )
-}
-
-// ─── Tests ──────────────────────────────────────────────────────────────
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::task::{CheckResult, GateLevel, GateReport, RepoName};
-
-    #[test]
-    fn state_mapping_exhaustive() {
-        // Verify every TaskStatus variant maps to a valid A2A state
-        let statuses: Vec<TaskStatus> = vec![
-            TaskStatus::Pending,
-            TaskStatus::Claimed {
-                agent_id: "a".into(),
-                claimed_at: Utc::now(),
-            },
-            TaskStatus::Implementing {
-                branch: "b".into(),
-                started_at: Utc::now(),
-            },
-            TaskStatus::Gate1Failed {
-                report: test_report(),
-            },
-            TaskStatus::Reviewing {
-                reviewer_output: "ok".into(),
-            },
-            TaskStatus::Gate2Failed {
-                report: test_report(),
-            },
-            TaskStatus::AwaitingApproval {
-                summary: crate::task::CheckpointSummary {
-                    diff_summary: String::new(),
-                    reviewer_output: String::new(),
-                    gate1_report: test_report(),
-                    gate2_report: None,
-                },
-            },
-            TaskStatus::Approved,
-            TaskStatus::Integrating,
-            TaskStatus::Gate3Failed {
-                report: test_report(),
-            },
-            TaskStatus::Merged {
-                commit_sha: "abc".into(),
-            },
-            TaskStatus::Rejected {
-                feedback: "nope".into(),
-            },
-        ];
-
-        let expected = [
-            A2aTaskState::Submitted,     // Pending
-            A2aTaskState::Submitted,     // Claimed
-            A2aTaskState::Working,       // Implementing
-            A2aTaskState::Failed,        // Gate1Failed
-            A2aTaskState::Working,       // Reviewing
-            A2aTaskState::Failed,        // Gate2Failed
-            A2aTaskState::InputRequired, // AwaitingApproval
-            A2aTaskState::Working,       // Approved
-            A2aTaskState::Working,       // Integrating
-            A2aTaskState::Failed,        // Gate3Failed
-            A2aTaskState::Completed,     // Merged
-            A2aTaskState::Rejected,      // Rejected
-        ];
-
-        for (status, expected_state) in statuses.iter().zip(expected.iter()) {
-            let actual = A2aTaskState::from_thrum_status(status);
-            assert_eq!(
-                actual,
-                *expected_state,
-                "status {:?} mapped to {:?}, expected {:?}",
-                status.label(),
-                actual,
-                expected_state,
-            );
-        }
-    }
-
-    #[test]
-    fn id_roundtrip() {
-        let id = TaskId(42);
-        let a2a = a2a_task_id(&id);
-        assert_eq!(a2a, "thrum-42");
-        let parsed = parse_thrum_task_id(&a2a).unwrap();
-        assert_eq!(parsed, id);
-    }
-
-    #[test]
-    fn id_parse_invalid() {
-        assert!(parse_thrum_task_id("invalid-42").is_none());
-        assert!(parse_thrum_task_id("thrum-abc").is_none());
-        assert!(parse_thrum_task_id("").is_none());
-    }
-
-    #[test]
-    fn context_id_from_task() {
-        let mut task = Task::new(RepoName::new("loom"), "Test".into(), "desc".into());
-        // No context_id set — falls back to repo name
-        assert_eq!(a2a_context_id(&task), "repo-loom");
-
-        // With explicit context_id
-        task.context_id = Some("sprint-42".into());
-        assert_eq!(a2a_context_id(&task), "sprint-42");
-    }
-
-    #[test]
-    fn agent_card_valid() {
-        let card = AgentCard::thrum_default("http://localhost:3000");
-        assert_eq!(card.name, "Thrum");
-        assert_eq!(card.url, "http://localhost:3000/a2a");
-        assert_eq!(card.skills.len(), 3);
-        assert!(card.capabilities.streaming);
-        assert!(!card.capabilities.push_notifications);
-
-        // Verify skills
-        let skill_ids: Vec<&str> = card.skills.iter().map(|s| s.id.as_str()).collect();
-        assert_eq!(skill_ids, vec!["implement", "review", "status"]);
-    }
-
-    #[test]
-    fn agent_card_serialization() {
-        let card = AgentCard::thrum_default("http://localhost:3000");
-        let json = serde_json::to_string(&card).unwrap();
-        let parsed: AgentCard = serde_json::from_str(&json).unwrap();
-        assert_eq!(parsed.name, "Thrum");
-        assert_eq!(parsed.skills.len(), 3);
-    }
-
-    #[test]
-    fn jsonrpc_response_success() {
-        let resp = JsonRpcResponse::success(serde_json::json!(1), serde_json::json!({"ok": true}));
-        assert_eq!(resp.jsonrpc, "2.0");
-        assert!(resp.result.is_some());
-        assert!(resp.error.is_none());
-
-        let json = serde_json::to_string(&resp).unwrap();
-        assert!(!json.contains("\"error\""));
-    }
-
-    #[test]
-    fn jsonrpc_response_error() {
-        let resp = JsonRpcResponse::error(serde_json::json!(1), METHOD_NOT_FOUND, "not found");
-        assert!(resp.result.is_none());
-        assert!(resp.error.is_some());
-        assert_eq!(resp.error.as_ref().unwrap().code, METHOD_NOT_FOUND);
-
-        let json = serde_json::to_string(&resp).unwrap();
-        assert!(!json.contains("\"result\""));
-    }
-
-    #[test]
-    fn a2a_task_from_thrum() {
-        let mut task = Task::new(
-            RepoName::new("loom"),
-            "Add feature X".into(),
-            "Details here".into(),
-        );
-        task.id = TaskId(7);
-        let a2a = A2aTask::from_thrum_task(&task);
-        assert_eq!(a2a.id, "thrum-7");
-        assert_eq!(a2a.context_id, "repo-loom");
-        assert_eq!(a2a.status.state, A2aTaskState::Submitted);
-        assert_eq!(a2a.history.len(), 1);
-        assert_eq!(a2a.history[0].role, A2aRole::User);
-        assert_eq!(a2a.metadata["repo"], "loom");
-    }
-
-    #[test]
-    fn message_ids_unique() {
-        let id1 = next_message_id();
-        let id2 = next_message_id();
-        assert_ne!(id1, id2);
-        assert!(id1.starts_with("msg-"));
-    }
-
-    #[test]
-    fn artifact_ids_unique() {
-        let id1 = next_artifact_id();
-        let id2 = next_artifact_id();
-        assert_ne!(id1, id2);
-        assert!(id1.starts_with("artifact-"));
-    }
-
-    #[test]
-    fn stream_event_serialization() {
-        let event = A2aStreamEvent::StatusUpdate {
-            task_id: "thrum-1".into(),
-            status: A2aTaskStatus {
-                state: A2aTaskState::Working,
-                timestamp: Utc::now(),
-                message: Some("implementing".into()),
-            },
-        };
-        let json = serde_json::to_string(&event).unwrap();
-        assert!(json.contains("\"type\":\"statusupdate\""));
-        let parsed: A2aStreamEvent = serde_json::from_str(&json).unwrap();
-        assert!(matches!(parsed, A2aStreamEvent::StatusUpdate { .. }));
-    }
-
-    #[test]
-    fn send_message_params_deserialize() {
-        let json = r#"{
-            "message": {
-                "message_id": "m1",
-                "role": "user",
-                "parts": [{"type": "text", "text": "implement X"}]
-            },
-            "context_id": "ctx-1"
-        }"#;
-        let params: SendMessageParams = serde_json::from_str(json).unwrap();
-        assert_eq!(params.context_id, Some("ctx-1".into()));
-        assert_eq!(params.message.parts.len(), 1);
-    }
-
-    fn test_report() -> GateReport {
-        GateReport {
-            level: GateLevel::Quality,
-            checks: vec![CheckResult {
-                name: "test".into(),
-                passed: false,
-                stdout: String::new(),
-                stderr: "fail".into(),
-                exit_code: 1,
-            }],
-            passed: false,
-            duration_secs: 1.0,
-        }
-    }
-}
diff --git a/crates/thrum-core/src/checkpoint.rs b/crates/thrum-core/src/checkpoint.rs
index 20963ac..971d707 100644
--- a/crates/thrum-core/src/checkpoint.rs
+++ b/crates/thrum-core/src/checkpoint.rs
@@ -166,13 +166,7 @@ mod tests {
     fn sample_gate_report(level: GateLevel) -> GateReport {
         GateReport {
             level,
-            checks: vec![CheckResult {
-                name: "test".into(),
-                passed: true,
-                stdout: "ok".into(),
-                stderr: String::new(),
-                exit_code: 0,
-            }],
+            checks: vec![CheckResult::simple("test", true, "ok", "", 0)],
             passed: true,
             duration_secs: 1.5,
         }
diff --git a/crates/thrum-core/src/ci.rs b/crates/thrum-core/src/ci.rs
new file mode 100644
index 0000000..e8469ac
--- /dev/null
+++ b/crates/thrum-core/src/ci.rs
@@ -0,0 +1,293 @@
+//! CI status types shared between core and runner.
+
+use serde::{Deserialize, Serialize};
+
+/// Status of a single CI check.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CICheck {
+    /// Name of the check (e.g. "build", "test", "lint").
+    pub name: String,
+    /// Status: "pending", "pass", "fail", "cancelled", "skipped".
+    pub status: String,
+    /// Optional URL to the check run details.
+    pub url: Option<String>,
+}
+
+/// Aggregated CI status for a PR.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub enum CIStatus {
+    /// Some checks are still running.
+    Pending,
+    /// All checks passed.
+    Pass,
+    /// At least one check failed.
+    Fail,
+    /// No checks found (CI may not be configured).
+    NoChecks,
+}
+
+impl std::fmt::Display for CIStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            CIStatus::Pending => write!(f, "pending"),
+            CIStatus::Pass => write!(f, "pass"),
+            CIStatus::Fail => write!(f, "fail"),
+            CIStatus::NoChecks => write!(f, "no-checks"),
+        }
+    }
+}
+
+/// Result of polling CI status.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CIPollResult {
+    pub status: CIStatus,
+    pub checks: Vec<CICheck>,
+    /// Human-readable summary.
+    pub summary: String,
+}
+
+impl CIPollResult {
+    /// Build a poll result from a list of checks.
+    ///
+    /// Automatically aggregates individual check statuses into an overall status:
+    /// - Any pending/queued/in_progress → `Pending`
+    /// - Any failure/error (and none pending) → `Fail`
+    /// - All success/skipped → `Pass`
+    /// - Empty checks → `NoChecks`
+    pub fn from_checks(checks: Vec<CICheck>) -> Self {
+        if checks.is_empty() {
+            return Self {
+                status: CIStatus::NoChecks,
+                checks,
+                summary: "No CI checks found".into(),
+            };
+        }
+
+        let any_pending = checks.iter().any(|c| {
+            matches!(
+                c.status.as_str(),
+                "pending" | "queued" | "in_progress" | "waiting"
+            )
+        });
+        let any_failed = checks
+            .iter()
+            .any(|c| matches!(c.status.as_str(), "failure" | "error" | "cancelled"));
+
+        let status = if any_pending {
+            CIStatus::Pending
+        } else if any_failed {
+            CIStatus::Fail
+        } else {
+            CIStatus::Pass
+        };
+
+        let passed = checks.iter().filter(|c| c.status == "success").count();
+        let failed = checks
+            .iter()
+            .filter(|c| c.status == "failure" || c.status == "error")
+            .count();
+        let pending = checks.len() - passed - failed;
+
+        let summary = format!(
+            "{passed} passed, {failed} failed, {pending} pending (total: {})",
+            checks.len()
+        );
+
+        Self {
+            status,
+            checks,
+            summary,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn ci_status_display_variants() {
+        assert_eq!(CIStatus::Pending.to_string(), "pending");
+        assert_eq!(CIStatus::Pass.to_string(), "pass");
+        assert_eq!(CIStatus::Fail.to_string(), "fail");
+        assert_eq!(CIStatus::NoChecks.to_string(), "no-checks");
+    }
+
+    #[test]
+    fn ci_status_equality() {
+        assert_eq!(CIStatus::Pass, CIStatus::Pass);
+        assert_ne!(CIStatus::Pass, CIStatus::Fail);
+        assert_ne!(CIStatus::Pending, CIStatus::NoChecks);
+    }
+
+    #[test]
+    fn ci_check_serialize_roundtrip() {
+        let check = CICheck {
+            name: "build".into(),
+            status: "success".into(),
+            url: Some("https://github.com/org/repo/actions/runs/123".into()),
+        };
+        let json = serde_json::to_string(&check).unwrap();
+        let parsed: CICheck = serde_json::from_str(&json).unwrap();
+        assert_eq!(parsed.name, "build");
+        assert_eq!(parsed.status, "success");
+        assert!(parsed.url.is_some());
+    }
+
+    #[test]
+    fn ci_check_without_url() {
+        let check = CICheck {
+            name: "lint".into(),
+            status: "pending".into(),
+            url: None,
+        };
+        let json = serde_json::to_string(&check).unwrap();
+        let parsed: CICheck = serde_json::from_str(&json).unwrap();
+        assert!(parsed.url.is_none());
+    }
+
+    #[test]
+    fn ci_poll_result_from_empty_checks() {
+        let result = CIPollResult::from_checks(vec![]);
+        assert_eq!(result.status, CIStatus::NoChecks);
+        assert!(result.checks.is_empty());
+    }
+
+    #[test]
+    fn ci_poll_result_all_passing() {
+        let checks = vec![
+            CICheck {
+                name: "build".into(),
+                status: "success".into(),
+                url: None,
+            },
+            CICheck {
+                name: "test".into(),
+                status: "success".into(),
+                url: None,
+            },
+            CICheck {
+                name: "lint".into(),
+                status: "success".into(),
+                url: None,
+            },
+        ];
+        let result = CIPollResult::from_checks(checks);
+        assert_eq!(result.status, CIStatus::Pass);
+        assert!(result.summary.contains("3 passed"));
+        assert!(result.summary.contains("0 failed"));
+    }
+
+    #[test]
+    fn ci_poll_result_with_failure() {
+        let checks = vec![
+            CICheck {
+                name: "build".into(),
+                status: "success".into(),
+                url: None,
+            },
+            CICheck {
+                name: "test".into(),
+                status: "failure".into(),
+                url: Some("https://example.com/run/456".into()),
+            },
+        ];
+        let result = CIPollResult::from_checks(checks);
+        assert_eq!(result.status, CIStatus::Fail);
+        assert!(result.summary.contains("1 failed"));
+    }
+
+    #[test]
+    fn ci_poll_result_pending_takes_priority() {
+        let checks = vec![
+            CICheck {
+                name: "build".into(),
+                status: "failure".into(),
+                url: None,
+            },
+            CICheck {
+                name: "test".into(),
+                status: "pending".into(),
+                url: None,
+            },
+        ];
+        let result = CIPollResult::from_checks(checks);
+        assert_eq!(result.status, CIStatus::Pending);
+    }
+
+    #[test]
+    fn ci_poll_result_queued_counts_as_pending() {
+        let checks = vec![CICheck {
+            name: "deploy".into(),
+            status: "queued".into(),
+            url: None,
+        }];
+        let result = CIPollResult::from_checks(checks);
+        assert_eq!(result.status, CIStatus::Pending);
+    }
+
+    #[test]
+    fn ci_poll_result_error_counts_as_failure() {
+        let checks = vec![CICheck {
+            name: "build".into(),
+            status: "error".into(),
+            url: None,
+        }];
+        let result = CIPollResult::from_checks(checks);
+        assert_eq!(result.status, CIStatus::Fail);
+    }
+
+    #[test]
+    fn ci_poll_result_cancelled_counts_as_failure() {
+        let checks = vec![
+            CICheck {
+                name: "build".into(),
+                status: "success".into(),
+                url: None,
+            },
+            CICheck {
+                name: "deploy".into(),
+                status: "cancelled".into(),
+                url: None,
+            },
+        ];
+        let result = CIPollResult::from_checks(checks);
+        assert_eq!(result.status, CIStatus::Fail);
+    }
+
+    #[test]
+    fn ci_poll_result_serialize_roundtrip() {
+        let result = CIPollResult {
+            status: CIStatus::Pass,
+            checks: vec![CICheck {
+                name: "test".into(),
+                status: "success".into(),
+                url: None,
+            }],
+            summary: "1 passed, 0 failed, 0 pending (total: 1)".into(),
+        };
+        let json = serde_json::to_string(&result).unwrap();
+        let parsed: CIPollResult = serde_json::from_str(&json).unwrap();
+        assert_eq!(parsed.status, CIStatus::Pass);
+        assert_eq!(parsed.checks.len(), 1);
+        assert_eq!(parsed.summary, result.summary);
+    }
+
+    #[test]
+    fn ci_poll_result_skipped_checks_count_as_pass() {
+        let checks = vec![
+            CICheck {
+                name: "build".into(),
+                status: "success".into(),
+                url: None,
+            },
+            CICheck {
+                name: "optional-lint".into(),
+                status: "skipped".into(),
+                url: None,
+            },
+        ];
+        let result = CIPollResult::from_checks(checks);
+        assert_eq!(result.status, CIStatus::Pass);
+    }
+}
diff --git a/crates/thrum-core/src/consistency.rs b/crates/thrum-core/src/consistency.rs
deleted file mode 100644
index 7baf0ad..0000000
--- a/crates/thrum-core/src/consistency.rs
+++ /dev/null
@@ -1,191 +0,0 @@
-use crate::task::RepoName;
-use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use std::path::Path;
-
-/// Version drift and configuration consistency across repos.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ConsistencyReport {
-    pub wasmparser_versions: HashMap<String, String>,
-    pub z3_versions: HashMap<String, Z3Config>,
-    pub rules_rust_versions: HashMap<String, String>,
-    pub rust_editions: HashMap<String, String>,
-    pub issues: Vec<ConsistencyIssue>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Z3Config {
-    pub crate_version: Option<String>,
-    pub bazel_version: Option<String>,
-    pub is_forked: bool,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum ConsistencyIssue {
-    VersionDrift {
-        dep: String,
-        versions: HashMap<String, String>,
-    },
-    UnpinnedDependency {
-        repo: String,
-        dep: String,
-        detail: String,
-    },
-    ProofToolchainMismatch {
-        repos: Vec<String>,
-        detail: String,
-    },
-    DuplicatedDefinition {
-        name: String,
-        locations: Vec<(String, String)>,
-    },
-}
-
-impl std::fmt::Display for ConsistencyIssue {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            ConsistencyIssue::VersionDrift { dep, versions } => {
-                write!(f, "Version drift for '{dep}': ")?;
-                for (repo, ver) in versions {
-                    write!(f, "{repo}={ver} ")?;
-                }
-                Ok(())
-            }
-            ConsistencyIssue::UnpinnedDependency { repo, dep, detail } => {
-                write!(f, "Unpinned dep '{dep}' in {repo}: {detail}")
-            }
-            ConsistencyIssue::ProofToolchainMismatch { repos, detail } => {
-                write!(f, "Proof toolchain mismatch across {repos:?}: {detail}")
-            }
-            ConsistencyIssue::DuplicatedDefinition { name, locations } => {
-                write!(f, "Duplicated definition '{name}' at: ")?;
-                for (repo, path) in locations {
-                    write!(f, "{repo}:{path} ")?;
-                }
-                Ok(())
-            }
-        }
-    }
-}
-
-/// Check consistency across all repos by parsing their Cargo.toml files.
-pub fn check_consistency(
-    repo_paths: &HashMap<RepoName, &Path>,
-) -> anyhow::Result<ConsistencyReport> {
-    let mut wasmparser_versions = HashMap::new();
-    let mut z3_versions = HashMap::new();
-    let mut rules_rust_versions = HashMap::new();
-    let mut rust_editions = HashMap::new();
-    let mut issues = Vec::new();
-
-    for (name, path) in repo_paths {
-        let repo_label = name.to_string();
-        let cargo_path = path.join("Cargo.toml");
-
-        if !cargo_path.exists() {
-            tracing::warn!(?cargo_path, "Cargo.toml not found, skipping");
-            continue;
-        }
-
-        let manifest = cargo_toml::Manifest::from_path(&cargo_path)?;
-
-        // Extract edition
-        if let Some(pkg) = &manifest.package {
-            let edition = &pkg.edition;
-            // Inheritable<Edition> — get the value if set explicitly
-            if let Ok(ed) = edition.get() {
-                let edition_str = format!("{ed:?}");
-                rust_editions.insert(repo_label.clone(), edition_str);
-            }
-        }
-
-        // Check workspace dependencies if present
-        let deps = if let Some(ref ws) = manifest.workspace {
-            ws.dependencies.clone()
-        } else {
-            manifest.dependencies.clone()
-        };
-
-        for (dep_name, dep) in &deps {
-            let version_str = match dep {
-                cargo_toml::Dependency::Simple(v) => v.clone(),
-                cargo_toml::Dependency::Inherited(_) => continue,
-                cargo_toml::Dependency::Detailed(d) => d.version.clone().unwrap_or_default(),
-            };
-
-            match dep_name.as_str() {
-                "wasmparser" => {
-                    wasmparser_versions.insert(repo_label.clone(), version_str);
-                }
-                "z3" | "z3-sys" => {
-                    let config =
-                        z3_versions
-                            .entry(repo_label.clone())
-                            .or_insert_with(|| Z3Config {
-                                crate_version: None,
-                                bazel_version: None,
-                                is_forked: false,
-                            });
-                    config.crate_version = Some(version_str);
-                }
-                "rules_rust" => {
-                    rules_rust_versions.insert(repo_label.clone(), version_str);
-                }
-                _ => {}
-            }
-        }
-    }
-
-    // Detect version drift for wasmparser
-    if wasmparser_versions
-        .values()
-        .collect::<std::collections::HashSet<_>>()
-        .len()
-        > 1
-    {
-        issues.push(ConsistencyIssue::VersionDrift {
-            dep: "wasmparser".into(),
-            versions: wasmparser_versions.clone(),
-        });
-    }
-
-    // Detect edition drift
-    if rust_editions
-        .values()
-        .collect::<std::collections::HashSet<_>>()
-        .len()
-        > 1
-    {
-        issues.push(ConsistencyIssue::VersionDrift {
-            dep: "rust-edition".into(),
-            versions: rust_editions.clone(),
-        });
-    }
-
-    Ok(ConsistencyReport {
-        wasmparser_versions,
-        z3_versions,
-        rules_rust_versions,
-        rust_editions,
-        issues,
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn issue_display() {
-        let issue = ConsistencyIssue::VersionDrift {
-            dep: "wasmparser".into(),
-            versions: HashMap::from([
-                ("loom".into(), "0.241".into()),
-                ("synth".into(), "0.219".into()),
-            ]),
-        };
-        let s = issue.to_string();
-        assert!(s.contains("wasmparser"));
-        assert!(s.contains("0.241"));
-    }
-}
diff --git a/crates/thrum-core/src/convergence.rs b/crates/thrum-core/src/convergence.rs
index 1289397..7cec0dd 100644
--- a/crates/thrum-core/src/convergence.rs
+++ b/crates/thrum-core/src/convergence.rs
@@ -334,12 +334,8 @@ mod tests {
             level: GateLevel::Quality,
             checks: checks
                 .into_iter()
-                .map(|(name, passed, stderr)| CheckResult {
-                    name: name.to_string(),
-                    passed,
-                    stdout: String::new(),
-                    stderr: stderr.to_string(),
-                    exit_code: if passed { 0 } else { 1 },
+                .map(|(name, passed, stderr)| {
+                    CheckResult::simple(name, passed, "", stderr, if passed { 0 } else { 1 })
                 })
                 .collect(),
             passed: false,
diff --git a/crates/thrum-core/src/dependency.rs b/crates/thrum-core/src/dependency.rs
new file mode 100644
index 0000000..ca8020d
--- /dev/null
+++ b/crates/thrum-core/src/dependency.rs
@@ -0,0 +1,810 @@
+//! Task dependency graph, topological ordering, and conflict detection.
+//!
+//! Provides:
+//! 1. **Dependency graph** — DAG of task prerequisites with cycle detection.
+//! 2. **Topological ordering** — dispatch tasks in dependency order.
+//! 3. **Batch barriers** — group tasks into batches that must all complete
+//!    before the next batch starts.
+//! 4. **Post-merge compilation checks** — verify the repo still compiles
+//!    after parallel merges before dispatching the next batch.
+//! 5. **Conflict analysis** — predict file-level conflicts between tasks
+//!    based on their planned file lists (from specs/plans).
+
+use crate::task::{Task, TaskId};
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use std::collections::{HashMap, HashSet, VecDeque};
+
+// ---------------------------------------------------------------------------
+// Dependency types
+// ---------------------------------------------------------------------------
+
+/// The kind of dependency between two tasks.
+#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum DependencyKind {
+    /// Task B must not start until task A is merged.
+    /// This is the default / most common kind.
+    #[default]
+    MustFinishBefore,
+    /// Soft dependency: task B *should* run after A, but can proceed if A
+    /// is stuck. The engine emits a warning but does not block dispatch.
+    SoftOrder,
+}
+
+/// A single dependency edge: "this task depends on `prerequisite`".
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TaskDependency {
+    /// The prerequisite task that must finish first.
+    pub prerequisite: TaskId,
+    /// What kind of dependency this is.
+    #[serde(default)]
+    pub kind: DependencyKind,
+}
+
+impl TaskDependency {
+    pub fn hard(prerequisite: TaskId) -> Self {
+        Self {
+            prerequisite,
+            kind: DependencyKind::MustFinishBefore,
+        }
+    }
+
+    pub fn soft(prerequisite: TaskId) -> Self {
+        Self {
+            prerequisite,
+            kind: DependencyKind::SoftOrder,
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Batch barrier
+// ---------------------------------------------------------------------------
+
+/// A batch barrier groups tasks so that all tasks in a batch must complete
+/// (reach Merged status) before tasks in the next batch are dispatched.
+///
+/// Barriers create sync points after parallel merges, enabling post-merge
+/// compilation checks before the next wave of work.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BatchBarrier {
+    /// Unique name for this barrier (e.g. "phase-1", "foundation").
+    pub name: String,
+    /// Ordering index: lower = earlier. Tasks in batch 0 must all finish
+    /// before batch 1 starts.
+    pub order: u32,
+    /// When this barrier was created.
+    pub created_at: DateTime<Utc>,
+}
+
+impl BatchBarrier {
+    pub fn new(name: impl Into<String>, order: u32) -> Self {
+        Self {
+            name: name.into(),
+            order,
+            created_at: Utc::now(),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Post-merge check result
+// ---------------------------------------------------------------------------
+
+/// Result of a post-merge compilation check between batch dispatches.
+///
+/// After a batch of parallel tasks is merged, the engine runs the repo's
+/// build command to verify no cross-references are broken (e.g. the
+/// convergence module issue from the task description).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PostMergeCheck {
+    /// Which repo was checked.
+    pub repo: crate::task::RepoName,
+    /// Whether the build succeeded.
+    pub passed: bool,
+    /// Stdout/stderr from the build.
+    pub output: String,
+    /// How long the check took.
+    pub duration_secs: f64,
+    /// Which batch just completed (if batch barriers are in use).
+    pub after_batch: Option<String>,
+    /// When the check ran.
+    pub checked_at: DateTime<Utc>,
+}
+
+// ---------------------------------------------------------------------------
+// Conflict prediction
+// ---------------------------------------------------------------------------
+
+/// A predicted file conflict between two tasks that haven't started yet.
+///
+/// Unlike [`FileConflict`](crate::coordination::FileConflict) which detects
+/// conflicts at runtime between running agents, this predicts conflicts
+/// based on planned file lists from task specs/plans.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PredictedConflict {
+    /// The overlapping file path.
+    pub path: std::path::PathBuf,
+    /// First task that plans to touch this file.
+    pub task_a: TaskId,
+    /// Second task that plans to touch this file.
+    pub task_b: TaskId,
+    /// Severity: how likely is this to cause a real merge conflict?
+    pub severity: ConflictSeverity,
+}
+
+/// How severe a predicted conflict is.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum ConflictSeverity {
+    /// Both tasks modify the same file — likely merge conflict.
+    High,
+    /// One task creates a file the other modifies — possible conflict.
+    Medium,
+    /// Both tasks read/reference the same file — probably fine.
+    Low,
+}
+
+// ---------------------------------------------------------------------------
+// Dependency graph
+// ---------------------------------------------------------------------------
+
+/// A directed acyclic graph of task dependencies.
+///
+/// Pure data structure — no async, no persistence. Built from a snapshot
+/// of tasks and used for ordering decisions.
+#[derive(Debug, Default)]
+pub struct DependencyGraph {
+    /// Adjacency list: task_id → set of tasks it depends on.
+    deps: HashMap<i64, HashSet<i64>>,
+    /// Reverse adjacency: task_id → set of tasks that depend on it.
+    rdeps: HashMap<i64, HashSet<i64>>,
+    /// All known task IDs.
+    nodes: HashSet<i64>,
+}
+
+impl DependencyGraph {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Build a dependency graph from a list of tasks.
+    pub fn from_tasks(tasks: &[Task]) -> Self {
+        let mut graph = Self::new();
+        for task in tasks {
+            graph.nodes.insert(task.id.0);
+            for dep in &task.depends_on {
+                graph.add_edge(task.id.0, dep.prerequisite.0);
+            }
+        }
+        graph
+    }
+
+    /// Add a dependency edge: `dependent` depends on `prerequisite`.
+    pub fn add_edge(&mut self, dependent: i64, prerequisite: i64) {
+        self.nodes.insert(dependent);
+        self.nodes.insert(prerequisite);
+        self.deps.entry(dependent).or_default().insert(prerequisite);
+        self.rdeps
+            .entry(prerequisite)
+            .or_default()
+            .insert(dependent);
+    }
+
+    /// Check if a specific task has all its dependencies satisfied.
+    ///
+    /// A dependency is "satisfied" if the prerequisite task is in the
+    /// `completed` set (i.e., it has reached Merged status).
+    pub fn is_ready(&self, task_id: i64, completed: &HashSet<i64>) -> bool {
+        match self.deps.get(&task_id) {
+            None => true,
+            Some(prerequisites) => prerequisites.iter().all(|p| completed.contains(p)),
+        }
+    }
+
+    /// Get all tasks that are ready to run (all dependencies satisfied).
+    pub fn ready_tasks(&self, completed: &HashSet<i64>) -> Vec<i64> {
+        self.nodes
+            .iter()
+            .filter(|id| !completed.contains(id) && self.is_ready(**id, completed))
+            .copied()
+            .collect()
+    }
+
+    /// Detect cycles using DFS with coloring.
+    ///
+    /// Returns `Some(cycle)` with the cycle path if a cycle exists,
+    /// or `None` if the graph is a valid DAG.
+    pub fn find_cycle(&self) -> Option<Vec<i64>> {
+        // 0 = white (unvisited), 1 = gray (in progress), 2 = black (done)
+        let mut colors: HashMap<i64, u8> = self.nodes.iter().map(|&id| (id, 0u8)).collect();
+        let mut path = Vec::new();
+
+        for &node in &self.nodes {
+            if colors[&node] == 0 && self.dfs_cycle(node, &mut colors, &mut path) {
+                return Some(path);
+            }
+        }
+        None
+    }
+
+    /// DFS helper for cycle detection.
+    /// Colors: 0 = white (unvisited), 1 = gray (in progress), 2 = black (done).
+    fn dfs_cycle(&self, node: i64, colors: &mut HashMap<i64, u8>, path: &mut Vec<i64>) -> bool {
+        colors.insert(node, 1); // gray
+        path.push(node);
+
+        if let Some(deps) = self.deps.get(&node) {
+            for &dep in deps {
+                match colors.get(&dep).copied() {
+                    Some(1) => {
+                        // Found a back edge — cycle detected
+                        path.push(dep);
+                        return true;
+                    }
+                    Some(0) | None => {
+                        if self.dfs_cycle(dep, colors, path) {
+                            return true;
+                        }
+                    }
+                    _ => {
+                        // Already fully explored (black), no cycle through this node
+                    }
+                }
+            }
+        }
+
+        colors.insert(node, 2); // black
+        path.pop();
+        false
+    }
+
+    /// Topological sort using Kahn's algorithm.
+    ///
+    /// Returns tasks in an order where all dependencies come before dependents.
+    /// Returns `Err` with the remaining nodes if a cycle exists.
+    pub fn topological_sort(&self) -> Result<Vec<i64>, Vec<i64>> {
+        // In our model: deps[A] = {B, C} means A depends on B and C.
+        // In topo sort terms, edges go B→A and C→A, so in_degree[A] += 1 for each.
+        let mut in_degree: HashMap<i64, usize> = HashMap::new();
+        for &node in &self.nodes {
+            in_degree.entry(node).or_insert(0);
+        }
+        for (&dependent, prerequisites) in &self.deps {
+            *in_degree.entry(dependent).or_insert(0) += prerequisites.len();
+        }
+
+        let mut queue: VecDeque<i64> = in_degree
+            .iter()
+            .filter(|&(_, &deg)| deg == 0)
+            .map(|(&id, _)| id)
+            .collect();
+
+        // Sort the initial queue for deterministic ordering
+        let mut sorted_queue: Vec<i64> = queue.drain(..).collect();
+        sorted_queue.sort();
+        queue.extend(sorted_queue);
+
+        let mut result = Vec::new();
+
+        while let Some(node) = queue.pop_front() {
+            result.push(node);
+
+            // For each task that depends on `node`, decrement its in-degree
+            if let Some(dependents) = self.rdeps.get(&node) {
+                let mut ready: Vec<i64> = Vec::new();
+                for &dependent in dependents {
+                    if let Some(deg) = in_degree.get_mut(&dependent) {
+                        *deg -= 1;
+                        if *deg == 0 {
+                            ready.push(dependent);
+                        }
+                    }
+                }
+                // Sort for deterministic ordering
+                ready.sort();
+                queue.extend(ready);
+            }
+        }
+
+        if result.len() == self.nodes.len() {
+            Ok(result)
+        } else {
+            // Remaining nodes are in a cycle
+            let in_result: HashSet<i64> = result.iter().copied().collect();
+            let remaining: Vec<i64> = self
+                .nodes
+                .iter()
+                .filter(|id| !in_result.contains(id))
+                .copied()
+                .collect();
+            Err(remaining)
+        }
+    }
+
+    /// Get the direct dependencies (prerequisites) for a task.
+    pub fn prerequisites(&self, task_id: i64) -> Vec<i64> {
+        self.deps
+            .get(&task_id)
+            .map(|s| s.iter().copied().collect())
+            .unwrap_or_default()
+    }
+
+    /// Get the direct dependents (tasks that depend on this one).
+    pub fn dependents(&self, task_id: i64) -> Vec<i64> {
+        self.rdeps
+            .get(&task_id)
+            .map(|s| s.iter().copied().collect())
+            .unwrap_or_default()
+    }
+
+    /// Number of tasks in the graph.
+    pub fn node_count(&self) -> usize {
+        self.nodes.len()
+    }
+
+    /// Number of dependency edges.
+    pub fn edge_count(&self) -> usize {
+        self.deps.values().map(|s| s.len()).sum()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Conflict predictor
+// ---------------------------------------------------------------------------
+
+/// Predicts file-level conflicts between tasks based on their planned file lists.
+///
+/// Uses the file lists from task specs/plans (if available) to detect
+/// potential overlaps before tasks are dispatched.
+pub fn predict_conflicts(tasks: &[Task]) -> Vec<PredictedConflict> {
+    let mut file_to_tasks: HashMap<String, Vec<&Task>> = HashMap::new();
+
+    for task in tasks {
+        if let Some(ref spec) = task.spec {
+            for file in &spec.design.affected_files {
+                file_to_tasks.entry(file.clone()).or_default().push(task);
+            }
+        }
+    }
+
+    let mut conflicts = Vec::new();
+
+    for (path, touching_tasks) in &file_to_tasks {
+        if touching_tasks.len() < 2 {
+            continue;
+        }
+
+        // Generate all pairs
+        for i in 0..touching_tasks.len() {
+            for j in (i + 1)..touching_tasks.len() {
+                let a = touching_tasks[i];
+                let b = touching_tasks[j];
+
+                // Only flag conflicts for tasks in the same repo
+                if a.repo != b.repo {
+                    continue;
+                }
+
+                conflicts.push(PredictedConflict {
+                    path: std::path::PathBuf::from(path),
+                    task_a: a.id.clone(),
+                    task_b: b.id.clone(),
+                    severity: ConflictSeverity::High,
+                });
+            }
+        }
+    }
+
+    conflicts
+}
+
+/// Check whether two tasks in the same batch should be serialized
+/// because they have overlapping planned file lists.
+pub fn tasks_have_file_overlap(task_a: &Task, task_b: &Task) -> bool {
+    if task_a.repo != task_b.repo {
+        return false;
+    }
+
+    let files_a: HashSet<_> = task_a
+        .spec
+        .as_ref()
+        .map(|s| s.design.affected_files.iter().collect::<HashSet<_>>())
+        .unwrap_or_default();
+
+    let files_b: HashSet<_> = task_b
+        .spec
+        .as_ref()
+        .map(|s| s.design.affected_files.iter().collect::<HashSet<_>>())
+        .unwrap_or_default();
+
+    !files_a.is_disjoint(&files_b)
+}
+
+// ---------------------------------------------------------------------------
+// Batch scheduling
+// ---------------------------------------------------------------------------
+
+/// Given a set of tasks with batch barriers, return the tasks eligible
+/// for dispatch in the current batch.
+///
+/// Tasks without a batch barrier are always eligible (they're "unbarriered").
+/// Tasks with a barrier are only eligible if all tasks in earlier batches
+/// have completed.
+pub fn eligible_for_dispatch(tasks: &[Task], completed_ids: &HashSet<i64>) -> Vec<TaskId> {
+    // Separate tasks by batch
+    let mut unbatched = Vec::new();
+    let mut batches: HashMap<u32, Vec<&Task>> = HashMap::new();
+
+    for task in tasks {
+        if completed_ids.contains(&task.id.0) {
+            continue;
+        }
+        match &task.batch_barrier {
+            Some(barrier) => {
+                batches.entry(barrier.order).or_default().push(task);
+            }
+            None => unbatched.push(task.id.clone()),
+        }
+    }
+
+    // Find the lowest incomplete batch
+    let mut batch_orders: Vec<u32> = batches.keys().copied().collect();
+    batch_orders.sort();
+
+    let mut eligible = unbatched;
+
+    for order in batch_orders {
+        let batch_tasks = &batches[&order];
+
+        // Check if all tasks in previous batches are complete
+        let all_in_this_batch: Vec<i64> = batch_tasks.iter().map(|t| t.id.0).collect();
+
+        // Check if any task in this batch is still incomplete
+        let batch_incomplete = all_in_this_batch
+            .iter()
+            .any(|id| !completed_ids.contains(id));
+
+        if batch_incomplete {
+            // This batch is the current active one — add its tasks to eligible
+            for task in batch_tasks {
+                if !completed_ids.contains(&task.id.0) {
+                    eligible.push(task.id.clone());
+                }
+            }
+            // Don't include any later batches
+            break;
+        }
+        // This batch is complete, move to the next one
+    }
+
+    eligible
+}
+
+// ---------------------------------------------------------------------------
+// Display helpers
+// ---------------------------------------------------------------------------
+
+impl std::fmt::Display for DependencyKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            DependencyKind::MustFinishBefore => write!(f, "hard"),
+            DependencyKind::SoftOrder => write!(f, "soft"),
+        }
+    }
+}
+
+impl std::fmt::Display for ConflictSeverity {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ConflictSeverity::High => write!(f, "high"),
+            ConflictSeverity::Medium => write!(f, "medium"),
+            ConflictSeverity::Low => write!(f, "low"),
+        }
+    }
+}
+
+impl std::fmt::Display for BatchBarrier {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "batch[{}] order={}", self.name, self.order)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::task::{RepoName, Task};
+
+    fn make_task(id: i64) -> Task {
+        let mut task = Task::new(RepoName::new("test"), format!("Task {id}"), "desc".into());
+        task.id = TaskId(id);
+        task
+    }
+
+    fn make_task_with_deps(id: i64, deps: Vec<i64>) -> Task {
+        let mut task = make_task(id);
+        task.depends_on = deps
+            .into_iter()
+            .map(|d| TaskDependency::hard(TaskId(d)))
+            .collect();
+        task
+    }
+
+    #[test]
+    fn empty_graph_has_no_cycles() {
+        let graph = DependencyGraph::new();
+        assert!(graph.find_cycle().is_none());
+    }
+
+    #[test]
+    fn linear_chain_no_cycle() {
+        let tasks = vec![
+            make_task(1),
+            make_task_with_deps(2, vec![1]),
+            make_task_with_deps(3, vec![2]),
+        ];
+        let graph = DependencyGraph::from_tasks(&tasks);
+        assert!(graph.find_cycle().is_none());
+        assert_eq!(graph.node_count(), 3);
+        assert_eq!(graph.edge_count(), 2);
+    }
+
+    #[test]
+    fn cycle_detected() {
+        let mut graph = DependencyGraph::new();
+        graph.add_edge(1, 2);
+        graph.add_edge(2, 3);
+        graph.add_edge(3, 1);
+        assert!(graph.find_cycle().is_some());
+    }
+
+    #[test]
+    fn topological_sort_linear() {
+        let tasks = vec![
+            make_task(1),
+            make_task_with_deps(2, vec![1]),
+            make_task_with_deps(3, vec![2]),
+        ];
+        let graph = DependencyGraph::from_tasks(&tasks);
+        let order = graph.topological_sort().unwrap();
+        assert_eq!(order, vec![1, 2, 3]);
+    }
+
+    #[test]
+    fn topological_sort_diamond() {
+        // 1 → 2, 1 → 3, 2 → 4, 3 → 4
+        let tasks = vec![
+            make_task(1),
+            make_task_with_deps(2, vec![1]),
+            make_task_with_deps(3, vec![1]),
+            make_task_with_deps(4, vec![2, 3]),
+        ];
+        let graph = DependencyGraph::from_tasks(&tasks);
+        let order = graph.topological_sort().unwrap();
+
+        // 1 must come before 2 and 3; 2 and 3 must come before 4
+        let pos = |id: i64| order.iter().position(|&x| x == id).unwrap();
+        assert!(pos(1) < pos(2));
+        assert!(pos(1) < pos(3));
+        assert!(pos(2) < pos(4));
+        assert!(pos(3) < pos(4));
+    }
+
+    #[test]
+    fn topological_sort_cycle_returns_err() {
+        let mut graph = DependencyGraph::new();
+        graph.add_edge(1, 2);
+        graph.add_edge(2, 1);
+        assert!(graph.topological_sort().is_err());
+    }
+
+    #[test]
+    fn is_ready_no_deps() {
+        let graph = DependencyGraph::from_tasks(&[make_task(1)]);
+        assert!(graph.is_ready(1, &HashSet::new()));
+    }
+
+    #[test]
+    fn is_ready_deps_not_met() {
+        let tasks = vec![make_task(1), make_task_with_deps(2, vec![1])];
+        let graph = DependencyGraph::from_tasks(&tasks);
+        assert!(!graph.is_ready(2, &HashSet::new()));
+    }
+
+    #[test]
+    fn is_ready_deps_met() {
+        let tasks = vec![make_task(1), make_task_with_deps(2, vec![1])];
+        let graph = DependencyGraph::from_tasks(&tasks);
+        let completed: HashSet<i64> = [1].into();
+        assert!(graph.is_ready(2, &completed));
+    }
+
+    #[test]
+    fn ready_tasks_filters_correctly() {
+        let tasks = vec![make_task(1), make_task_with_deps(2, vec![1]), make_task(3)];
+        let graph = DependencyGraph::from_tasks(&tasks);
+
+        let ready = graph.ready_tasks(&HashSet::new());
+        assert!(ready.contains(&1));
+        assert!(ready.contains(&3));
+        assert!(!ready.contains(&2));
+
+        let ready_after = graph.ready_tasks(&[1].into());
+        assert!(ready_after.contains(&2));
+        assert!(ready_after.contains(&3));
+    }
+
+    #[test]
+    fn prerequisites_and_dependents() {
+        let tasks = vec![
+            make_task(1),
+            make_task_with_deps(2, vec![1]),
+            make_task_with_deps(3, vec![1]),
+        ];
+        let graph = DependencyGraph::from_tasks(&tasks);
+
+        assert!(graph.prerequisites(1).is_empty());
+        let mut deps_of_2 = graph.prerequisites(2);
+        deps_of_2.sort();
+        assert_eq!(deps_of_2, vec![1]);
+
+        let mut dependents_of_1 = graph.dependents(1);
+        dependents_of_1.sort();
+        assert_eq!(dependents_of_1, vec![2, 3]);
+    }
+
+    #[test]
+    fn batch_barrier_display() {
+        let barrier = BatchBarrier::new("phase-1", 0);
+        let s = barrier.to_string();
+        assert!(s.contains("phase-1"));
+        assert!(s.contains("order=0"));
+    }
+
+    #[test]
+    fn eligible_for_dispatch_unbatched() {
+        let tasks = vec![make_task(1), make_task(2), make_task(3)];
+        let eligible = eligible_for_dispatch(&tasks, &HashSet::new());
+        assert_eq!(eligible.len(), 3);
+    }
+
+    #[test]
+    fn eligible_for_dispatch_respects_batch_order() {
+        let mut t1 = make_task(1);
+        t1.batch_barrier = Some(BatchBarrier::new("batch-0", 0));
+        let mut t2 = make_task(2);
+        t2.batch_barrier = Some(BatchBarrier::new("batch-0", 0));
+        let mut t3 = make_task(3);
+        t3.batch_barrier = Some(BatchBarrier::new("batch-1", 1));
+
+        let tasks = vec![t1, t2, t3];
+
+        // Only batch-0 tasks should be eligible
+        let eligible = eligible_for_dispatch(&tasks, &HashSet::new());
+        let eligible_ids: HashSet<i64> = eligible.iter().map(|t| t.0).collect();
+        assert!(eligible_ids.contains(&1));
+        assert!(eligible_ids.contains(&2));
+        assert!(!eligible_ids.contains(&3));
+
+        // After batch-0 completes, batch-1 tasks become eligible
+        let completed: HashSet<i64> = [1, 2].into();
+        let eligible_after = eligible_for_dispatch(&tasks, &completed);
+        let eligible_ids: HashSet<i64> = eligible_after.iter().map(|t| t.0).collect();
+        assert!(eligible_ids.contains(&3));
+    }
+
+    #[test]
+    fn predict_conflicts_same_file() {
+        use std::path::PathBuf;
+
+        let mut t1 = make_task(1);
+        t1.spec = Some(crate::spec::Spec {
+            design: crate::spec::DesignSpec {
+                affected_files: vec!["src/lib.rs".into(), "src/main.rs".into()],
+                ..Default::default()
+            },
+            ..crate::spec::Spec::default()
+        });
+
+        let mut t2 = make_task(2);
+        t2.spec = Some(crate::spec::Spec {
+            design: crate::spec::DesignSpec {
+                affected_files: vec!["src/lib.rs".into(), "src/other.rs".into()],
+                ..Default::default()
+            },
+            ..crate::spec::Spec::default()
+        });
+
+        let conflicts = predict_conflicts(&[t1, t2]);
+        assert_eq!(conflicts.len(), 1);
+        assert_eq!(conflicts[0].path, PathBuf::from("src/lib.rs"));
+    }
+
+    #[test]
+    fn predict_conflicts_different_repos_no_conflict() {
+        let mut t1 = Task::new(RepoName::new("loom"), "Task 1".into(), "d".into());
+        t1.id = TaskId(1);
+        t1.spec = Some(crate::spec::Spec {
+            design: crate::spec::DesignSpec {
+                affected_files: vec!["src/lib.rs".into()],
+                ..Default::default()
+            },
+            ..crate::spec::Spec::default()
+        });
+
+        let mut t2 = Task::new(RepoName::new("synth"), "Task 2".into(), "d".into());
+        t2.id = TaskId(2);
+        t2.spec = Some(crate::spec::Spec {
+            design: crate::spec::DesignSpec {
+                affected_files: vec!["src/lib.rs".into()],
+                ..Default::default()
+            },
+            ..crate::spec::Spec::default()
+        });
+
+        let conflicts = predict_conflicts(&[t1, t2]);
+        assert!(conflicts.is_empty());
+    }
+
+    #[test]
+    fn tasks_have_file_overlap_true() {
+        let mut t1 = make_task(1);
+        t1.spec = Some(crate::spec::Spec {
+            design: crate::spec::DesignSpec {
+                affected_files: vec!["src/lib.rs".into()],
+                ..Default::default()
+            },
+            ..crate::spec::Spec::default()
+        });
+
+        let mut t2 = make_task(2);
+        t2.spec = Some(crate::spec::Spec {
+            design: crate::spec::DesignSpec {
+                affected_files: vec!["src/lib.rs".into()],
+                ..Default::default()
+            },
+            ..crate::spec::Spec::default()
+        });
+
+        assert!(tasks_have_file_overlap(&t1, &t2));
+    }
+
+    #[test]
+    fn tasks_have_file_overlap_false() {
+        let mut t1 = make_task(1);
+        t1.spec = Some(crate::spec::Spec {
+            design: crate::spec::DesignSpec {
+                affected_files: vec!["src/foo.rs".into()],
+                ..Default::default()
+            },
+            ..crate::spec::Spec::default()
+        });
+
+        let mut t2 = make_task(2);
+        t2.spec = Some(crate::spec::Spec {
+            design: crate::spec::DesignSpec {
+                affected_files: vec!["src/bar.rs".into()],
+                ..Default::default()
+            },
+            ..crate::spec::Spec::default()
+        });
+
+        assert!(!tasks_have_file_overlap(&t1, &t2));
+    }
+
+    #[test]
+    fn dependency_kind_display() {
+        assert_eq!(DependencyKind::MustFinishBefore.to_string(), "hard");
+        assert_eq!(DependencyKind::SoftOrder.to_string(), "soft");
+    }
+
+    #[test]
+    fn conflict_severity_display() {
+        assert_eq!(ConflictSeverity::High.to_string(), "high");
+        assert_eq!(ConflictSeverity::Medium.to_string(), "medium");
+        assert_eq!(ConflictSeverity::Low.to_string(), "low");
+    }
+}
diff --git a/crates/thrum-core/src/event.rs b/crates/thrum-core/src/event.rs
index 2ab8296..b983967 100644
--- a/crates/thrum-core/src/event.rs
+++ b/crates/thrum-core/src/event.rs
@@ -9,6 +9,7 @@
 use crate::agent::AgentId;
 use crate::checkpoint::CompletedPhase;
 use crate::coordination::{ConflictPolicy, FileConflict};
+use crate::sync::SyncTrigger;
 use crate::task::{GateLevel, RepoName, TaskId};
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize};
@@ -46,6 +47,9 @@ pub enum EventKind {
         agent_id: AgentId,
         task_id: TaskId,
         repo: RepoName,
+        /// Human-readable task title for display in dashboards.
+        #[serde(default)]
+        task_title: String,
     },
 
     /// A line of output from an agent subprocess.
@@ -156,6 +160,170 @@ pub enum EventKind {
         /// How many times the worst-case failure signature has been seen.
         repeated_count: u32,
     },
+
+    /// A timeout was detected and recovered from (rather than treated as failure).
+    TimeoutRecovered {
+        task_id: TaskId,
+        repo: RepoName,
+        /// Which role timed out (e.g. "implementer", "reviewer").
+        role: String,
+        /// The recovery action taken (e.g. "review-skipped-timeout", "retry-with-session").
+        recovery_action: String,
+        /// Whether the worktree had partial changes at time of timeout.
+        had_partial_changes: bool,
+    },
+
+    // -- CI status events --
+    /// CI polling started for a PR.
+    CIPollingStarted {
+        task_id: TaskId,
+        repo: RepoName,
+        pr_number: u64,
+        pr_url: String,
+    },
+
+    /// CI check status update (from polling).
+    CICheckUpdate {
+        task_id: TaskId,
+        repo: RepoName,
+        pr_number: u64,
+        /// Overall status: "pending", "pass", "fail".
+        status: String,
+        /// Summary of individual check results.
+        summary: String,
+    },
+
+    /// All CI checks passed — PR will be merged.
+    CIPassed {
+        task_id: TaskId,
+        repo: RepoName,
+        pr_number: u64,
+    },
+
+    /// CI checks failed — dispatching ci_fixer agent.
+    CIFailed {
+        task_id: TaskId,
+        repo: RepoName,
+        pr_number: u64,
+        /// Which attempt this is (1-based).
+        attempt: u32,
+        /// Max attempts allowed.
+        max_attempts: u32,
+        /// Summary of the CI failure.
+        failure_summary: String,
+    },
+
+    /// CI fixer agent pushed a fix commit and is waiting for CI re-run.
+    CIFixPushed {
+        task_id: TaskId,
+        repo: RepoName,
+        pr_number: u64,
+        attempt: u32,
+    },
+
+    /// CI retries exhausted — escalating to human review.
+    CIEscalated {
+        task_id: TaskId,
+        repo: RepoName,
+        pr_number: u64,
+        attempts: u32,
+        failure_summary: String,
+    },
+
+    // -- Remote sync events --
+    /// Remote sync started for a repository.
+    SyncStarted {
+        repo: RepoName,
+        trigger: SyncTrigger,
+    },
+
+    /// Remote sync completed successfully.
+    SyncCompleted {
+        repo: RepoName,
+        remote_sha: String,
+        branches_rebased: u32,
+        branches_conflicted: u32,
+        trigger: SyncTrigger,
+    },
+
+    /// A branch was rebased onto updated main.
+    BranchRebased {
+        repo: RepoName,
+        branch: String,
+        task_id: Option<TaskId>,
+        success: bool,
+        had_conflicts: bool,
+    },
+
+    /// A rebase agent was dispatched to resolve conflicts.
+    RebaseAgentDispatched {
+        repo: RepoName,
+        branch: String,
+        task_id: Option<TaskId>,
+    },
+
+    /// Remote sync failed.
+    SyncFailed {
+        repo: RepoName,
+        error: String,
+        trigger: SyncTrigger,
+    },
+
+    // -- Task dependency events --
+    /// A task was blocked from dispatch because its dependencies are not satisfied.
+    TaskBlocked {
+        task_id: TaskId,
+        /// The task IDs that are still incomplete.
+        blocked_by: Vec<TaskId>,
+    },
+
+    /// A task's dependencies are now all satisfied — it is ready for dispatch.
+    TaskUnblocked {
+        task_id: TaskId,
+        /// The dependency that was just resolved (the last blocker).
+        resolved_by: TaskId,
+    },
+
+    /// A dependency cycle was detected in the task graph.
+    DependencyCycleDetected {
+        /// The task IDs involved in the cycle.
+        cycle: Vec<TaskId>,
+    },
+
+    /// A batch barrier has been reached — all tasks in a batch completed.
+    BatchBarrierReached {
+        /// The batch barrier name.
+        batch_name: String,
+        /// Number of tasks that completed in this batch.
+        tasks_completed: u32,
+    },
+
+    /// Post-merge compilation check ran between batch dispatches.
+    PostMergeCheckCompleted {
+        repo: RepoName,
+        passed: bool,
+        /// Which batch just completed (if batch barriers are in use).
+        after_batch: Option<String>,
+        duration_secs: f64,
+    },
+
+    /// Predicted file conflict detected between planned tasks.
+    PredictedConflictDetected {
+        task_a: TaskId,
+        task_b: TaskId,
+        /// The overlapping file path.
+        path: PathBuf,
+        severity: String,
+    },
+    /// Task data was modified via dashboard actions (created, edited, or deleted).
+    /// Reactive dashboard clients use this to refresh the task list without polling.
+    TaskDataChanged,
+
+    /// Budget configuration was updated via dashboard.
+    BudgetUpdated,
+
+    /// Memory entries were modified via dashboard (cleared or decayed).
+    MemoryUpdated,
 }
 
 /// What kind of file system change was detected.
@@ -193,8 +361,17 @@ impl std::fmt::Display for PipelineEvent {
             } => write!(f, "[{ts}] {task_id} ({repo}): {from} -> {to}"),
 
             EventKind::AgentStarted {
-                agent_id, task_id, ..
-            } => write!(f, "[{ts}] {agent_id} started on {task_id}"),
+                agent_id,
+                task_id,
+                task_title,
+                ..
+            } => {
+                if task_title.is_empty() {
+                    write!(f, "[{ts}] {agent_id} started on {task_id}")
+                } else {
+                    write!(f, "[{ts}] {agent_id} started on {task_id} ({task_title})")
+                }
+            }
 
             EventKind::AgentOutput {
                 agent_id,
@@ -343,6 +520,202 @@ impl std::fmt::Display for PipelineEvent {
                 f,
                 "[{ts}] {task_id}: convergence detected (strategy={strategy}, repeats={repeated_count})"
             ),
+
+            EventKind::TimeoutRecovered {
+                task_id,
+                repo,
+                role,
+                recovery_action,
+                had_partial_changes,
+            } => {
+                let partial = if *had_partial_changes {
+                    ", partial changes preserved"
+                } else {
+                    ""
+                };
+                write!(
+                    f,
+                    "[{ts}] {task_id} ({repo}): timeout recovered ({role}: {recovery_action}{partial})"
+                )
+            }
+
+            EventKind::CIPollingStarted {
+                task_id,
+                repo,
+                pr_number,
+                ..
+            } => write!(
+                f,
+                "[{ts}] {task_id} ({repo}): CI polling started for PR #{pr_number}"
+            ),
+
+            EventKind::CICheckUpdate {
+                task_id,
+                pr_number,
+                status,
+                summary,
+                ..
+            } => write!(
+                f,
+                "[{ts}] {task_id}: CI PR #{pr_number} status={status}: {summary}"
+            ),
+
+            EventKind::CIPassed {
+                task_id, pr_number, ..
+            } => write!(f, "[{ts}] {task_id}: CI PR #{pr_number} PASSED"),
+
+            EventKind::CIFailed {
+                task_id,
+                pr_number,
+                attempt,
+                max_attempts,
+                failure_summary,
+                ..
+            } => write!(
+                f,
+                "[{ts}] {task_id}: CI PR #{pr_number} FAILED (attempt {attempt}/{max_attempts}): {failure_summary}"
+            ),
+
+            EventKind::CIFixPushed {
+                task_id,
+                pr_number,
+                attempt,
+                ..
+            } => write!(
+                f,
+                "[{ts}] {task_id}: CI fix pushed for PR #{pr_number} (attempt {attempt})"
+            ),
+
+            EventKind::CIEscalated {
+                task_id,
+                pr_number,
+                attempts,
+                failure_summary,
+                ..
+            } => write!(
+                f,
+                "[{ts}] {task_id}: CI ESCALATED for PR #{pr_number} after {attempts} attempts: {failure_summary}"
+            ),
+
+            EventKind::SyncStarted { repo, trigger } => {
+                write!(f, "[{ts}] SYNC ({repo}): started ({trigger})")
+            }
+
+            EventKind::SyncCompleted {
+                repo,
+                remote_sha,
+                branches_rebased,
+                branches_conflicted,
+                trigger,
+            } => write!(
+                f,
+                "[{ts}] SYNC ({repo}): completed ({trigger}) sha={} rebased={branches_rebased} conflicts={branches_conflicted}",
+                &remote_sha[..7.min(remote_sha.len())]
+            ),
+
+            EventKind::BranchRebased {
+                repo,
+                branch,
+                success,
+                had_conflicts,
+                ..
+            } => {
+                let status = if *success {
+                    "OK"
+                } else if *had_conflicts {
+                    "CONFLICT"
+                } else {
+                    "FAIL"
+                };
+                write!(f, "[{ts}] SYNC ({repo}): rebase {branch} -> {status}")
+            }
+
+            EventKind::RebaseAgentDispatched {
+                repo,
+                branch,
+                task_id,
+                ..
+            } => {
+                let task_str = task_id
+                    .as_ref()
+                    .map(|t| format!(" {t}"))
+                    .unwrap_or_default();
+                write!(
+                    f,
+                    "[{ts}] SYNC ({repo}): rebase agent dispatched for {branch}{task_str}"
+                )
+            }
+
+            EventKind::SyncFailed {
+                repo,
+                error,
+                trigger,
+            } => write!(f, "[{ts}] SYNC ({repo}): FAILED ({trigger}): {error}"),
+
+            EventKind::TaskBlocked {
+                task_id,
+                blocked_by,
+            } => {
+                let blockers: Vec<String> = blocked_by.iter().map(|t| t.to_string()).collect();
+                write!(f, "[{ts}] {task_id}: BLOCKED by [{}]", blockers.join(", "))
+            }
+
+            EventKind::TaskUnblocked {
+                task_id,
+                resolved_by,
+            } => write!(f, "[{ts}] {task_id}: UNBLOCKED (resolved by {resolved_by})"),
+
+            EventKind::DependencyCycleDetected { cycle } => {
+                let ids: Vec<String> = cycle.iter().map(|t| t.to_string()).collect();
+                write!(f, "[{ts}] DEPENDENCY CYCLE: [{}]", ids.join(" -> "))
+            }
+
+            EventKind::BatchBarrierReached {
+                batch_name,
+                tasks_completed,
+            } => write!(
+                f,
+                "[{ts}] BATCH BARRIER reached: {batch_name} ({tasks_completed} tasks completed)"
+            ),
+
+            EventKind::PostMergeCheckCompleted {
+                repo,
+                passed,
+                after_batch,
+                duration_secs,
+            } => {
+                let status = if *passed { "PASS" } else { "FAIL" };
+                let batch = after_batch
+                    .as_deref()
+                    .map(|b| format!(" after batch {b}"))
+                    .unwrap_or_default();
+                write!(
+                    f,
+                    "[{ts}] POST-MERGE CHECK ({repo}): {status}{batch} ({duration_secs:.1}s)"
+                )
+            }
+
+            EventKind::PredictedConflictDetected {
+                task_a,
+                task_b,
+                path,
+                severity,
+            } => write!(
+                f,
+                "[{ts}] PREDICTED CONFLICT ({severity}): {} between {task_a} and {task_b}",
+                path.display()
+            ),
+            EventKind::TaskDataChanged => {
+                write!(f, "[{ts}] TASK: dashboard data changed")
+            }
+
+            EventKind::BudgetUpdated => {
+                write!(f, "[{ts}] BUDGET: updated via dashboard")
+            }
+
+            EventKind::MemoryUpdated => {
+                write!(f, "[{ts}] MEMORY: modified via dashboard")
+            }
         }
     }
 }
@@ -380,6 +753,44 @@ mod tests {
         ));
     }
 
+    #[test]
+    fn agent_started_includes_task_title() {
+        let event = PipelineEvent::new(EventKind::AgentStarted {
+            agent_id: AgentId("agent-1-loom-TASK-0001".into()),
+            task_id: TaskId(1),
+            repo: RepoName::new("loom"),
+            task_title: "Fix agent activity cards".into(),
+        });
+        let s = event.to_string();
+        assert!(s.contains("TASK-0001"));
+        assert!(s.contains("Fix agent activity cards"));
+
+        let json = serde_json::to_string(&event).unwrap();
+        assert!(json.contains("task_title"));
+        assert!(json.contains("Fix agent activity cards"));
+
+        let parsed: PipelineEvent = serde_json::from_str(&json).unwrap();
+        match &parsed.kind {
+            EventKind::AgentStarted { task_title, .. } => {
+                assert_eq!(task_title, "Fix agent activity cards");
+            }
+            _ => panic!("expected AgentStarted"),
+        }
+    }
+
+    #[test]
+    fn agent_started_backward_compat_no_title() {
+        // Verify deserialization works when task_title is missing (backward compat)
+        let json = r#"{"timestamp":"2026-02-18T00:00:00Z","kind":{"AgentStarted":{"agent_id":"agent-1","task_id":1,"repo":"loom"}}}"#;
+        let parsed: PipelineEvent = serde_json::from_str(json).unwrap();
+        match &parsed.kind {
+            EventKind::AgentStarted { task_title, .. } => {
+                assert!(task_title.is_empty());
+            }
+            _ => panic!("expected AgentStarted"),
+        }
+    }
+
     #[test]
     fn gate_output_display() {
         let event = PipelineEvent::new(EventKind::GateOutput {
@@ -519,4 +930,322 @@ mod tests {
         let s = event.to_string();
         assert!(s.contains("shared[api_version] = v2"));
     }
+
+    #[test]
+    fn ci_polling_started_display() {
+        let event = PipelineEvent::new(EventKind::CIPollingStarted {
+            task_id: TaskId(23),
+            repo: RepoName::new("loom"),
+            pr_number: 42,
+            pr_url: "https://github.com/org/loom/pull/42".into(),
+        });
+        let s = event.to_string();
+        assert!(s.contains("TASK-0023"));
+        assert!(s.contains("CI polling started"));
+        assert!(s.contains("PR #42"));
+    }
+
+    #[test]
+    fn ci_check_update_display() {
+        let event = PipelineEvent::new(EventKind::CICheckUpdate {
+            task_id: TaskId(23),
+            repo: RepoName::new("loom"),
+            pr_number: 42,
+            status: "pending".into(),
+            summary: "2 passed, 0 failed, 1 pending (total: 3)".into(),
+        });
+        let s = event.to_string();
+        assert!(s.contains("TASK-0023"));
+        assert!(s.contains("PR #42"));
+        assert!(s.contains("status=pending"));
+    }
+
+    #[test]
+    fn ci_passed_display() {
+        let event = PipelineEvent::new(EventKind::CIPassed {
+            task_id: TaskId(23),
+            repo: RepoName::new("loom"),
+            pr_number: 42,
+        });
+        let s = event.to_string();
+        assert!(s.contains("TASK-0023"));
+        assert!(s.contains("PR #42 PASSED"));
+    }
+
+    #[test]
+    fn ci_failed_display() {
+        let event = PipelineEvent::new(EventKind::CIFailed {
+            task_id: TaskId(23),
+            repo: RepoName::new("loom"),
+            pr_number: 42,
+            attempt: 2,
+            max_attempts: 3,
+            failure_summary: "test_neon failed".into(),
+        });
+        let s = event.to_string();
+        assert!(s.contains("TASK-0023"));
+        assert!(s.contains("PR #42 FAILED"));
+        assert!(s.contains("attempt 2/3"));
+        assert!(s.contains("test_neon failed"));
+    }
+
+    #[test]
+    fn ci_fix_pushed_display() {
+        let event = PipelineEvent::new(EventKind::CIFixPushed {
+            task_id: TaskId(23),
+            repo: RepoName::new("loom"),
+            pr_number: 42,
+            attempt: 1,
+        });
+        let s = event.to_string();
+        assert!(s.contains("TASK-0023"));
+        assert!(s.contains("CI fix pushed"));
+        assert!(s.contains("PR #42"));
+    }
+
+    #[test]
+    fn ci_escalated_display() {
+        let event = PipelineEvent::new(EventKind::CIEscalated {
+            task_id: TaskId(23),
+            repo: RepoName::new("loom"),
+            pr_number: 42,
+            attempts: 3,
+            failure_summary: "build failed".into(),
+        });
+        let s = event.to_string();
+        assert!(s.contains("TASK-0023"));
+        assert!(s.contains("CI ESCALATED"));
+        assert!(s.contains("PR #42"));
+        assert!(s.contains("3 attempts"));
+    }
+
+    #[test]
+    fn sync_started_display() {
+        use crate::sync::SyncTrigger;
+        let event = PipelineEvent::new(EventKind::SyncStarted {
+            repo: RepoName::new("loom"),
+            trigger: SyncTrigger::PrMerge { pr_number: 42 },
+        });
+        let s = event.to_string();
+        assert!(s.contains("SYNC (loom)"));
+        assert!(s.contains("started"));
+        assert!(s.contains("pr-merge(#42)"));
+    }
+
+    #[test]
+    fn sync_completed_display() {
+        use crate::sync::SyncTrigger;
+        let event = PipelineEvent::new(EventKind::SyncCompleted {
+            repo: RepoName::new("loom"),
+            remote_sha: "abc1234567890".into(),
+            branches_rebased: 2,
+            branches_conflicted: 1,
+            trigger: SyncTrigger::Manual,
+        });
+        let s = event.to_string();
+        assert!(s.contains("SYNC (loom)"));
+        assert!(s.contains("completed"));
+        assert!(s.contains("abc1234"));
+        assert!(s.contains("rebased=2"));
+        assert!(s.contains("conflicts=1"));
+    }
+
+    #[test]
+    fn branch_rebased_display() {
+        let event = PipelineEvent::new(EventKind::BranchRebased {
+            repo: RepoName::new("loom"),
+            branch: "auto/TASK-0001/loom/feature".into(),
+            task_id: Some(TaskId(1)),
+            success: true,
+            had_conflicts: false,
+        });
+        let s = event.to_string();
+        assert!(s.contains("SYNC (loom)"));
+        assert!(s.contains("rebase"));
+        assert!(s.contains("OK"));
+    }
+
+    #[test]
+    fn branch_rebased_conflict_display() {
+        let event = PipelineEvent::new(EventKind::BranchRebased {
+            repo: RepoName::new("synth"),
+            branch: "auto/TASK-0002/synth/fix".into(),
+            task_id: Some(TaskId(2)),
+            success: false,
+            had_conflicts: true,
+        });
+        let s = event.to_string();
+        assert!(s.contains("CONFLICT"));
+    }
+
+    #[test]
+    fn rebase_agent_dispatched_display() {
+        let event = PipelineEvent::new(EventKind::RebaseAgentDispatched {
+            repo: RepoName::new("loom"),
+            branch: "auto/TASK-0003/loom/refactor".into(),
+            task_id: Some(TaskId(3)),
+        });
+        let s = event.to_string();
+        assert!(s.contains("rebase agent dispatched"));
+        assert!(s.contains("TASK-0003"));
+    }
+
+    #[test]
+    fn sync_failed_display() {
+        use crate::sync::SyncTrigger;
+        let event = PipelineEvent::new(EventKind::SyncFailed {
+            repo: RepoName::new("loom"),
+            error: "network timeout".into(),
+            trigger: SyncTrigger::Startup,
+        });
+        let s = event.to_string();
+        assert!(s.contains("FAILED"));
+        assert!(s.contains("network timeout"));
+        assert!(s.contains("startup"));
+    }
+
+    #[test]
+    fn timeout_recovered_display() {
+        let event = PipelineEvent::new(EventKind::TimeoutRecovered {
+            task_id: TaskId(7),
+            repo: RepoName::new("loom"),
+            role: "reviewer".into(),
+            recovery_action: "review-skipped-timeout".into(),
+            had_partial_changes: false,
+        });
+        let s = event.to_string();
+        assert!(s.contains("TASK-0007"));
+        assert!(s.contains("timeout recovered"));
+        assert!(s.contains("reviewer: review-skipped-timeout"));
+    }
+
+    #[test]
+    fn timeout_recovered_with_partial_changes() {
+        let event = PipelineEvent::new(EventKind::TimeoutRecovered {
+            task_id: TaskId(8),
+            repo: RepoName::new("synth"),
+            role: "implementer".into(),
+            recovery_action: "retry-with-session".into(),
+            had_partial_changes: true,
+        });
+        let s = event.to_string();
+        assert!(s.contains("partial changes preserved"));
+    }
+
+    #[test]
+    fn timeout_recovered_continued_with_partial_changes() {
+        // Covers the case where agent timed out but committed changes exist
+        let event = PipelineEvent::new(EventKind::TimeoutRecovered {
+            task_id: TaskId(9),
+            repo: RepoName::new("loom"),
+            role: "implementer".into(),
+            recovery_action: "continued-with-partial-changes".into(),
+            had_partial_changes: true,
+        });
+        let s = event.to_string();
+        assert!(s.contains("TASK-0009"));
+        assert!(s.contains("continued-with-partial-changes"));
+        assert!(s.contains("partial changes preserved"));
+    }
+
+    #[test]
+    fn timeout_recovered_salvaged_partial_work() {
+        // Covers the case where agent timed out with uncommitted work saved as WIP
+        let event = PipelineEvent::new(EventKind::TimeoutRecovered {
+            task_id: TaskId(10),
+            repo: RepoName::new("synth"),
+            role: "implementer".into(),
+            recovery_action: "salvaged-partial-work".into(),
+            had_partial_changes: true,
+        });
+        let s = event.to_string();
+        assert!(s.contains("salvaged-partial-work"));
+        assert!(s.contains("partial changes preserved"));
+    }
+
+    #[test]
+    fn timeout_recovered_review_retried_reduced_scope() {
+        // Covers the review retry with reduced scope
+        let event = PipelineEvent::new(EventKind::TimeoutRecovered {
+            task_id: TaskId(11),
+            repo: RepoName::new("loom"),
+            role: "reviewer".into(),
+            recovery_action: "review-retried-reduced-scope".into(),
+            had_partial_changes: false,
+        });
+        let s = event.to_string();
+        assert!(s.contains("review-retried-reduced-scope"));
+        assert!(!s.contains("partial changes preserved"));
+    }
+
+    #[test]
+    fn timeout_recovered_serialize_roundtrip() {
+        let event = PipelineEvent::new(EventKind::TimeoutRecovered {
+            task_id: TaskId(7),
+            repo: RepoName::new("loom"),
+            role: "reviewer".into(),
+            recovery_action: "review-skipped-timeout".into(),
+            had_partial_changes: false,
+        });
+        let json = serde_json::to_string(&event).unwrap();
+        let parsed: PipelineEvent = serde_json::from_str(&json).unwrap();
+        assert!(matches!(parsed.kind, EventKind::TimeoutRecovered { .. }));
+    }
+
+    #[test]
+    fn sync_event_serialize_roundtrip() {
+        use crate::sync::SyncTrigger;
+        let event = PipelineEvent::new(EventKind::SyncStarted {
+            repo: RepoName::new("loom"),
+            trigger: SyncTrigger::Manual,
+        });
+        let json = serde_json::to_string(&event).unwrap();
+        let parsed: PipelineEvent = serde_json::from_str(&json).unwrap();
+        assert!(matches!(parsed.kind, EventKind::SyncStarted { .. }));
+    }
+
+    #[test]
+    fn task_data_changed_display() {
+        let event = PipelineEvent::new(EventKind::TaskDataChanged);
+        let s = event.to_string();
+        assert!(s.contains("TASK: dashboard data changed"));
+    }
+
+    #[test]
+    fn task_data_changed_serialize_roundtrip() {
+        let event = PipelineEvent::new(EventKind::TaskDataChanged);
+        let json = serde_json::to_string(&event).unwrap();
+        let parsed: PipelineEvent = serde_json::from_str(&json).unwrap();
+        assert!(matches!(parsed.kind, EventKind::TaskDataChanged));
+    }
+
+    #[test]
+    fn budget_updated_display() {
+        let event = PipelineEvent::new(EventKind::BudgetUpdated);
+        let s = event.to_string();
+        assert!(s.contains("BUDGET: updated via dashboard"));
+    }
+
+    #[test]
+    fn memory_updated_display() {
+        let event = PipelineEvent::new(EventKind::MemoryUpdated);
+        let s = event.to_string();
+        assert!(s.contains("MEMORY: modified via dashboard"));
+    }
+
+    #[test]
+    fn ci_event_serialize_roundtrip() {
+        let event = PipelineEvent::new(EventKind::CIPollingStarted {
+            task_id: TaskId(10),
+            repo: RepoName::new("synth"),
+            pr_number: 99,
+            pr_url: "https://github.com/org/synth/pull/99".into(),
+        });
+        let json = serde_json::to_string(&event).unwrap();
+        let parsed: PipelineEvent = serde_json::from_str(&json).unwrap();
+        assert!(matches!(
+            parsed.kind,
+            EventKind::CIPollingStarted { pr_number: 99, .. }
+        ));
+    }
 }
diff --git a/crates/thrum-core/src/gate.rs b/crates/thrum-core/src/gate.rs
index 07528d4..6c3e48e 100644
--- a/crates/thrum-core/src/gate.rs
+++ b/crates/thrum-core/src/gate.rs
@@ -1,6 +1,6 @@
-use crate::repo::{RepoConfig, ReposConfig};
+use crate::repo::{MutantsConfig, RepoConfig, ReposConfig};
 use crate::subsample::{self, SubsampleConfig};
-use crate::task::{CheckResult, GateLevel, GateReport, RepoName};
+use crate::task::{CheckFinding, CheckResult, GateLevel, GateReport, RepoName};
 use serde::Deserialize;
 use std::path::Path;
 use std::process::Command;
@@ -23,6 +23,9 @@ pub struct IntegrationGateConfig {
 /// When `subsample` is `Some` and enabled, test commands are wrapped through
 /// `subsample_test_cmd()` using the ratio for the corresponding gate level.
 /// Gate 1 uses `gate1_ratio`, Gate 2 uses `gate2_ratio`, Gate 3 uses `gate3_ratio`.
+///
+/// Quality gate checks are now configurable per-repo via `repo.checks`:
+/// `["cargo_fmt", "cargo_clippy", "cargo_test", "cargo_audit", "cargo_deny", "cargo_mutants"]`
 pub fn run_gate(
     level: &GateLevel,
     repo: &RepoConfig,
@@ -42,6 +45,8 @@ pub fn run_gate(
                 stdout: "Use run_integration_gate() for Gate 3".into(),
                 stderr: String::new(),
                 exit_code: 0,
+                duration_secs: 0.0,
+                findings: Vec::new(),
             }]
         }
     };
@@ -57,12 +62,12 @@ pub fn run_gate(
     })
 }
 
-/// Run Gate 3: full integration pipeline (meld → loom → synth).
+/// Run Gate 3: full integration pipeline (meld -> loom -> synth).
 ///
 /// Executes the three tools in sequence on a test fixture:
-/// 1. meld fuse: WASM component → fused WASM module
-/// 2. loom optimize: fused WASM → optimized WASM
-/// 3. synth compile: optimized WASM → native binary (ARM ELF)
+/// 1. meld fuse: WASM component -> fused WASM module
+/// 2. loom optimize: fused WASM -> optimized WASM
+/// 3. synth compile: optimized WASM -> native binary (ARM ELF)
 ///
 /// Each step's output feeds into the next. If any step fails,
 /// the gate reports the failure with captured stdout/stderr.
@@ -93,13 +98,7 @@ pub fn run_integration_gate(repos: &ReposConfig, fixture: &Path) -> anyhow::Resu
             return finish_gate(GateLevel::Integration, checks, start);
         }
     } else {
-        checks.push(CheckResult {
-            name: "meld_fuse".into(),
-            passed: false,
-            stdout: String::new(),
-            stderr: "meld repo not configured".into(),
-            exit_code: -1,
-        });
+        checks.push(make_error_result("meld_fuse", "meld repo not configured"));
         return finish_gate(GateLevel::Integration, checks, start);
     }
 
@@ -118,13 +117,10 @@ pub fn run_integration_gate(repos: &ReposConfig, fixture: &Path) -> anyhow::Resu
             return finish_gate(GateLevel::Integration, checks, start);
         }
     } else {
-        checks.push(CheckResult {
-            name: "loom_optimize".into(),
-            passed: false,
-            stdout: String::new(),
-            stderr: "loom repo not configured".into(),
-            exit_code: -1,
-        });
+        checks.push(make_error_result(
+            "loom_optimize",
+            "loom repo not configured",
+        ));
         return finish_gate(GateLevel::Integration, checks, start);
     }
 
@@ -138,13 +134,10 @@ pub fn run_integration_gate(repos: &ReposConfig, fixture: &Path) -> anyhow::Resu
         let result = run_cmd("synth_compile", &cmd, &synth.path)?;
         checks.push(result);
     } else {
-        checks.push(CheckResult {
-            name: "synth_compile".into(),
-            passed: false,
-            stdout: String::new(),
-            stderr: "synth repo not configured".into(),
-            exit_code: -1,
-        });
+        checks.push(make_error_result(
+            "synth_compile",
+            "synth repo not configured",
+        ));
     }
 
     // Verify final output exists
@@ -156,6 +149,8 @@ pub fn run_integration_gate(repos: &ReposConfig, fixture: &Path) -> anyhow::Resu
             stdout: format!("Output ELF: {} bytes", meta.len()),
             stderr: String::new(),
             exit_code: 0,
+            duration_secs: 0.0,
+            findings: Vec::new(),
         });
     }
 
@@ -185,13 +180,10 @@ pub fn run_integration_gate_configured(
         let repo_config = match repos.get(&repo_name) {
             Some(r) => r,
             None => {
-                checks.push(CheckResult {
-                    name: step.label.clone(),
-                    passed: false,
-                    stdout: String::new(),
-                    stderr: format!("{} repo not configured", step.repo),
-                    exit_code: -1,
-                });
+                checks.push(make_error_result(
+                    &step.label,
+                    &format!("{} repo not configured", step.repo),
+                ));
                 return finish_gate(GateLevel::Integration, checks, start);
             }
         };
@@ -236,17 +228,70 @@ fn finish_gate(
     })
 }
 
+/// Construct an error CheckResult without running a command.
+fn make_error_result(name: &str, message: &str) -> CheckResult {
+    CheckResult {
+        name: name.into(),
+        passed: false,
+        stdout: String::new(),
+        stderr: message.into(),
+        exit_code: -1,
+        duration_secs: 0.0,
+        findings: Vec::new(),
+    }
+}
+
+// ─── Quality checks (configurable per-repo) ─────────────────────────
+
+/// Run Gate 1 quality checks according to the repo's `checks` configuration.
+///
+/// The check list is driven by `repo.checks`. Recognised values:
+/// - `cargo_fmt`: run `repo.fmt_cmd`
+/// - `cargo_clippy`: run `repo.lint_cmd`
+/// - `cargo_test`: run `repo.test_cmd` (with optional subsampling)
+/// - `cargo_audit`: scan dependencies for known vulnerabilities
+/// - `cargo_deny`: enforce license/ban/duplicate policy
+/// - `cargo_mutants`: mutation testing (opt-in, configurable via `repo.mutants`)
 fn run_quality_checks(
     repo: &RepoConfig,
     subsample: Option<&SubsampleConfig>,
     task_id: Option<i64>,
 ) -> anyhow::Result<Vec<CheckResult>> {
     let test_cmd = maybe_subsample(&repo.test_cmd, subsample, task_id, |c| c.gate1_ratio);
-    let checks = vec![
-        run_cmd("cargo_fmt", &repo.fmt_cmd, &repo.path)?,
-        run_cmd("cargo_clippy", &repo.lint_cmd, &repo.path)?,
-        run_cmd("cargo_test", &test_cmd, &repo.path)?,
-    ];
+    let mut checks = Vec::new();
+
+    for check_name in &repo.checks {
+        let result = match check_name.as_str() {
+            "cargo_fmt" => run_cmd("cargo_fmt", &repo.fmt_cmd, &repo.path)?,
+            "cargo_clippy" => run_cmd("cargo_clippy", &repo.lint_cmd, &repo.path)?,
+            "cargo_test" => run_cmd("cargo_test", &test_cmd, &repo.path)?,
+            "cargo_audit" => run_cargo_audit(&repo.path)?,
+            "cargo_deny" => run_cargo_deny(&repo.path)?,
+            "cargo_mutants" => {
+                let mutants_config = repo.mutants.clone().unwrap_or_default();
+                run_cargo_mutants(&repo.path, &mutants_config)?
+            }
+            other => {
+                tracing::warn!(check = other, "unknown check name, skipping");
+                CheckResult {
+                    name: other.to_string(),
+                    passed: false,
+                    stdout: String::new(),
+                    stderr: format!("unknown check: {other}"),
+                    exit_code: -1,
+                    duration_secs: 0.0,
+                    findings: vec![CheckFinding {
+                        category: "config_error".into(),
+                        severity: "error".into(),
+                        message: format!("Unknown check name: {other}"),
+                        value: None,
+                    }],
+                }
+            }
+        };
+        checks.push(result);
+    }
+
     Ok(checks)
 }
 
@@ -268,13 +313,15 @@ fn run_proof_checks(
     // If the repo has a test command and we're at Gate 2, run tests at gate2_ratio
     // (Gate 2 typically runs at 1.0 for full coverage)
     if repo.verify_cmd.is_none() && repo.proofs_cmd.is_none() {
-        // No proof tooling — gate passes vacuously
+        // No proof tooling -- gate passes vacuously
         checks.push(CheckResult {
             name: "no_proofs_configured".into(),
             passed: true,
             stdout: "No proof commands configured for this repo".into(),
             stderr: String::new(),
             exit_code: 0,
+            duration_secs: 0.0,
+            findings: Vec::new(),
         });
     }
 
@@ -286,6 +333,407 @@ fn run_proof_checks(
     Ok(checks)
 }
 
+// ─── New check runners ──────────────────────────────────────────────
+
+/// Run `cargo audit` to scan for known vulnerabilities in dependencies.
+///
+/// Parses the JSON output to find advisories with CVSS >= 7.0.
+/// The check fails if any high-severity advisory is found.
+pub fn run_cargo_audit(cwd: &Path) -> anyhow::Result<CheckResult> {
+    let start = Instant::now();
+    tracing::info!(?cwd, "running cargo audit");
+
+    let output = Command::new("sh")
+        .arg("-c")
+        .arg("cargo audit --json 2>/dev/null || cargo audit 2>&1")
+        .current_dir(cwd)
+        .output()?;
+
+    let duration = start.elapsed().as_secs_f64();
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+    let exit_code = output.status.code().unwrap_or(-1);
+
+    let findings = parse_audit_findings(&stdout);
+    let high_severity_count = findings.iter().filter(|f| f.severity == "error").count();
+
+    // Fail if any advisory with CVSS >= 7.0 (those are marked severity "error")
+    let passed = output.status.success() || high_severity_count == 0;
+
+    let result = CheckResult {
+        name: "cargo_audit".into(),
+        passed,
+        stdout,
+        stderr,
+        exit_code,
+        duration_secs: duration,
+        findings,
+    };
+
+    if result.passed {
+        tracing::info!("cargo_audit passed");
+    } else {
+        tracing::warn!(
+            high_severity_count,
+            "cargo_audit failed: {} high-severity advisories",
+            high_severity_count
+        );
+    }
+
+    Ok(result)
+}
+
+/// Parse cargo-audit output into structured findings.
+///
+/// Attempts JSON parsing first; falls back to text scanning.
+fn parse_audit_findings(stdout: &str) -> Vec<CheckFinding> {
+    let mut findings = Vec::new();
+
+    // Try JSON parsing (cargo audit --json)
+    if let Ok(json) = serde_json::from_str::<serde_json::Value>(stdout) {
+        if let Some(vulns) = json
+            .get("vulnerabilities")
+            .and_then(|v| v.get("list"))
+            .and_then(|l| l.as_array())
+        {
+            for vuln in vulns {
+                let id = vuln
+                    .get("advisory")
+                    .and_then(|a| a.get("id"))
+                    .and_then(|i| i.as_str())
+                    .unwrap_or("unknown");
+                let title = vuln
+                    .get("advisory")
+                    .and_then(|a| a.get("title"))
+                    .and_then(|t| t.as_str())
+                    .unwrap_or("unknown");
+                let cvss = vuln
+                    .get("advisory")
+                    .and_then(|a| a.get("cvss"))
+                    .and_then(|c| c.as_f64());
+
+                let severity = if cvss.unwrap_or(0.0) >= 7.0 {
+                    "error"
+                } else {
+                    "warning"
+                };
+
+                findings.push(CheckFinding {
+                    category: "advisory".into(),
+                    severity: severity.into(),
+                    message: format!("{id}: {title}"),
+                    value: cvss,
+                });
+            }
+        }
+        // Report summary counts
+        if let Some(count) = json
+            .get("vulnerabilities")
+            .and_then(|v| v.get("found"))
+            .and_then(|f| f.as_u64())
+        {
+            findings.push(CheckFinding {
+                category: "summary".into(),
+                severity: "info".into(),
+                message: format!("{count} total vulnerabilities found"),
+                value: Some(count as f64),
+            });
+        }
+    } else {
+        // Fallback: scan text for RUSTSEC advisories
+        for line in stdout.lines() {
+            if line.contains("RUSTSEC-") {
+                findings.push(CheckFinding {
+                    category: "advisory".into(),
+                    severity: "warning".into(),
+                    message: line.trim().to_string(),
+                    value: None,
+                });
+            }
+        }
+    }
+
+    findings
+}
+
+/// Run `cargo deny check` to enforce license policy, banned crates, and duplicates.
+///
+/// Requires a `deny.toml` in the repo root. If not present, the check passes
+/// with an info finding noting the absence.
+pub fn run_cargo_deny(cwd: &Path) -> anyhow::Result<CheckResult> {
+    let start = Instant::now();
+    tracing::info!(?cwd, "running cargo deny");
+
+    // Check if deny.toml exists — if not, note it and pass
+    let deny_toml = cwd.join("deny.toml");
+    if !deny_toml.exists() {
+        let duration = start.elapsed().as_secs_f64();
+        return Ok(CheckResult {
+            name: "cargo_deny".into(),
+            passed: true,
+            stdout: "No deny.toml found — cargo-deny not configured for this repo".into(),
+            stderr: String::new(),
+            exit_code: 0,
+            duration_secs: duration,
+            findings: vec![CheckFinding {
+                category: "config".into(),
+                severity: "info".into(),
+                message: "deny.toml not found; skipping cargo-deny checks".into(),
+                value: None,
+            }],
+        });
+    }
+
+    let output = Command::new("sh")
+        .arg("-c")
+        .arg("cargo deny check 2>&1")
+        .current_dir(cwd)
+        .output()?;
+
+    let duration = start.elapsed().as_secs_f64();
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+    let exit_code = output.status.code().unwrap_or(-1);
+
+    let findings = parse_deny_findings(&stdout);
+    let passed = output.status.success();
+
+    let result = CheckResult {
+        name: "cargo_deny".into(),
+        passed,
+        stdout,
+        stderr,
+        exit_code,
+        duration_secs: duration,
+        findings,
+    };
+
+    if result.passed {
+        tracing::info!("cargo_deny passed");
+    } else {
+        tracing::warn!(exit_code, "cargo_deny failed");
+    }
+
+    Ok(result)
+}
+
+/// Parse cargo-deny output into structured findings.
+fn parse_deny_findings(output: &str) -> Vec<CheckFinding> {
+    let mut findings = Vec::new();
+
+    for line in output.lines() {
+        let trimmed = line.trim();
+        if trimmed.contains("DENIED") || trimmed.contains("error[") {
+            findings.push(CheckFinding {
+                category: "policy_violation".into(),
+                severity: "error".into(),
+                message: trimmed.to_string(),
+                value: None,
+            });
+        } else if trimmed.contains("warning[") || trimmed.contains("WARN") {
+            findings.push(CheckFinding {
+                category: "policy_warning".into(),
+                severity: "warning".into(),
+                message: trimmed.to_string(),
+                value: None,
+            });
+        }
+    }
+
+    findings
+}
+
+/// Run `cargo mutants` for mutation testing on the repository.
+///
+/// When `config.changed_files_only` is true and there are changed files
+/// detectable via git, only those files are tested. Reports mutation
+/// survival rate as a structured finding. Warns (but does not fail the gate)
+/// if survival rate exceeds `config.max_survival_rate`.
+pub fn run_cargo_mutants(cwd: &Path, config: &MutantsConfig) -> anyhow::Result<CheckResult> {
+    let start = Instant::now();
+    tracing::info!(?cwd, ?config, "running cargo mutants");
+
+    // Build the command
+    let mut cmd_parts = vec!["cargo".to_string(), "mutants".to_string()];
+
+    // Add timeout
+    cmd_parts.push(format!("--timeout={}", config.timeout_secs));
+
+    // If changed_files_only, detect changed files via git
+    if config.changed_files_only {
+        let git_output = Command::new("git")
+            .args(["diff", "--name-only", "HEAD~1", "--diff-filter=ACMR"])
+            .current_dir(cwd)
+            .output();
+
+        if let Ok(out) = git_output {
+            let files: Vec<String> = String::from_utf8_lossy(&out.stdout)
+                .lines()
+                .filter(|f| f.ends_with(".rs"))
+                .map(|f| f.to_string())
+                .collect();
+
+            if files.is_empty() {
+                let duration = start.elapsed().as_secs_f64();
+                return Ok(CheckResult {
+                    name: "cargo_mutants".into(),
+                    passed: true,
+                    stdout: "No .rs files changed — mutation testing skipped".into(),
+                    stderr: String::new(),
+                    exit_code: 0,
+                    duration_secs: duration,
+                    findings: vec![CheckFinding {
+                        category: "mutation_skipped".into(),
+                        severity: "info".into(),
+                        message: "No Rust source files changed".into(),
+                        value: None,
+                    }],
+                });
+            }
+
+            for file in &files {
+                cmd_parts.push(format!("--file={file}"));
+            }
+        }
+    }
+
+    // Add extra args
+    for arg in &config.extra_args {
+        cmd_parts.push(arg.clone());
+    }
+
+    let cmd_str = cmd_parts.join(" ");
+    let output = Command::new("sh")
+        .arg("-c")
+        .arg(&cmd_str)
+        .current_dir(cwd)
+        .output()?;
+
+    let duration = start.elapsed().as_secs_f64();
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+    let exit_code = output.status.code().unwrap_or(-1);
+
+    let (survival_rate, findings) = parse_mutants_findings(&stdout, config.max_survival_rate);
+
+    // Mutation testing is advisory: the check passes, but we warn if survival
+    // rate is too high. Only fail if cargo-mutants itself errored out (not
+    // because of surviving mutants).
+    let passed = exit_code != -1; // Only fail on tool crash, not on surviving mutants
+
+    let result = CheckResult {
+        name: "cargo_mutants".into(),
+        passed,
+        stdout,
+        stderr,
+        exit_code,
+        duration_secs: duration,
+        findings,
+    };
+
+    if let Some(rate) = survival_rate
+        && rate > config.max_survival_rate
+    {
+        tracing::warn!(
+            survival_rate = rate,
+            threshold = config.max_survival_rate,
+            "mutation survival rate exceeds threshold"
+        );
+    }
+
+    Ok(result)
+}
+
+/// Parse cargo-mutants output to extract mutation survival rate.
+///
+/// Returns (survival_rate, findings).
+fn parse_mutants_findings(stdout: &str, max_rate: f64) -> (Option<f64>, Vec<CheckFinding>) {
+    let mut findings = Vec::new();
+    let mut total_mutants: Option<u64> = None;
+    let mut survived: Option<u64> = None;
+
+    for line in stdout.lines() {
+        let trimmed = line.trim();
+
+        // Look for summary lines like "42 mutants tested: 35 killed, 7 survived"
+        if trimmed.contains("mutants tested") {
+            if let Some(total) = extract_number_before(trimmed, "mutants tested") {
+                total_mutants = Some(total);
+            }
+            if let Some(s) = extract_number_before(trimmed, "survived") {
+                survived = Some(s);
+            }
+        }
+    }
+
+    let survival_rate = match (total_mutants, survived) {
+        (Some(total), Some(surv)) if total > 0 => {
+            let rate = (surv as f64 / total as f64) * 100.0;
+            findings.push(CheckFinding {
+                category: "mutation_summary".into(),
+                severity: "info".into(),
+                message: format!("{surv}/{total} mutants survived ({rate:.1}%)"),
+                value: Some(rate),
+            });
+
+            if rate > max_rate {
+                findings.push(CheckFinding {
+                    category: "mutation_survival_high".into(),
+                    severity: "warning".into(),
+                    message: format!(
+                        "Mutation survival rate {rate:.1}% exceeds threshold {max_rate:.1}%"
+                    ),
+                    value: Some(rate),
+                });
+            }
+
+            Some(rate)
+        }
+        (Some(total), None) if total > 0 => {
+            findings.push(CheckFinding {
+                category: "mutation_summary".into(),
+                severity: "info".into(),
+                message: format!("{total} mutants tested, 0 survived (0.0%)"),
+                value: Some(0.0),
+            });
+            Some(0.0)
+        }
+        _ => {
+            findings.push(CheckFinding {
+                category: "mutation_parse".into(),
+                severity: "info".into(),
+                message: "Could not parse mutation testing summary".into(),
+                value: None,
+            });
+            None
+        }
+    };
+
+    (survival_rate, findings)
+}
+
+/// Extract a number appearing before a keyword in a line.
+/// E.g., "35 killed" -> 35 when keyword is "killed".
+fn extract_number_before(line: &str, keyword: &str) -> Option<u64> {
+    if let Some(pos) = line.find(keyword) {
+        let before = &line[..pos].trim_end();
+        // Walk backwards to find the number
+        let num_str: String = before
+            .chars()
+            .rev()
+            .take_while(|c| c.is_ascii_digit())
+            .collect::<String>()
+            .chars()
+            .rev()
+            .collect();
+        num_str.parse().ok()
+    } else {
+        None
+    }
+}
+
+// ─── Command runner ─────────────────────────────────────────────────
+
 /// Optionally wrap a test command through `subsample_test_cmd()`.
 ///
 /// Only applies subsampling when the config is `Some`, enabled, and a task_id
@@ -317,7 +765,184 @@ fn maybe_subsample(
     }
 }
 
+/// Run spec compliance checks: verify that the implementation matches the spec.
+///
+/// Checks:
+/// 1. Affected files in spec match actual files changed
+/// 2. Proof obligation files exist
+///
+/// Returns a `CheckResult` that can be added to a gate report.
+pub fn run_spec_compliance_check(
+    spec: &crate::spec::Spec,
+    actual_files_changed: &[String],
+    repo_root: &std::path::Path,
+) -> CheckResult {
+    let issues = spec.verify_implementation(actual_files_changed, repo_root);
+
+    if issues.is_empty() {
+        return CheckResult {
+            name: "spec_compliance".into(),
+            passed: true,
+            stdout: "All spec compliance checks passed".into(),
+            stderr: String::new(),
+            exit_code: 0,
+            duration_secs: 0.0,
+            findings: Vec::new(),
+        };
+    }
+
+    let has_errors = issues
+        .iter()
+        .any(|i| i.severity == crate::spec::ComplianceSeverity::Error);
+
+    let mut stdout = String::new();
+    for issue in &issues {
+        stdout.push_str(&format!("{issue}\n"));
+    }
+
+    CheckResult {
+        name: "spec_compliance".into(),
+        passed: !has_errors,
+        stdout,
+        stderr: String::new(),
+        exit_code: if has_errors { 1 } else { 0 },
+        duration_secs: 0.0,
+        findings: Vec::new(),
+    }
+}
+
+/// Run proof obligation checks from a spec.
+///
+/// For each proof obligation in the spec, runs the appropriate prover command
+/// if configured, or checks that the proof file exists at minimum.
+pub fn run_spec_proof_checks(
+    spec: &crate::spec::Spec,
+    repo: &RepoConfig,
+) -> anyhow::Result<Vec<CheckResult>> {
+    let mut checks = Vec::new();
+
+    for po in &spec.proof_obligations {
+        let prover_lower = po.prover.to_lowercase();
+
+        // Try to match proof obligation prover to repo commands
+        let check_name = format!("spec_proof_{}", prover_lower);
+
+        if prover_lower.contains("z3") {
+            if let Some(ref verify_cmd) = repo.verify_cmd {
+                checks.push(run_cmd(&check_name, verify_cmd, &repo.path)?);
+            } else if let Some(ref proof_file) = po.proof_file {
+                // No verify_cmd configured — check that the proof file at least exists
+                let exists = repo.path.join(proof_file).exists();
+                checks.push(CheckResult {
+                    name: check_name,
+                    passed: exists,
+                    stdout: if exists {
+                        format!("Proof file '{}' exists (no verify_cmd to run)", proof_file)
+                    } else {
+                        format!(
+                            "Proof file '{}' required by spec but does not exist",
+                            proof_file
+                        )
+                    },
+                    stderr: String::new(),
+                    exit_code: if exists { 0 } else { 1 },
+                    duration_secs: 0.0,
+                    findings: Vec::new(),
+                });
+            }
+        } else if prover_lower.contains("rocq") || prover_lower.contains("coq") {
+            if let Some(ref proofs_cmd) = repo.proofs_cmd {
+                checks.push(run_cmd(&check_name, proofs_cmd, &repo.path)?);
+            } else if let Some(ref proof_file) = po.proof_file {
+                let exists = repo.path.join(proof_file).exists();
+                checks.push(CheckResult {
+                    name: check_name,
+                    passed: exists,
+                    stdout: if exists {
+                        format!("Proof file '{}' exists (no proofs_cmd to run)", proof_file)
+                    } else {
+                        format!(
+                            "Proof file '{}' required by spec but does not exist",
+                            proof_file
+                        )
+                    },
+                    stderr: String::new(),
+                    exit_code: if exists { 0 } else { 1 },
+                    duration_secs: 0.0,
+                    findings: Vec::new(),
+                });
+            }
+        } else {
+            // Unknown prover — just check file existence
+            if let Some(ref proof_file) = po.proof_file {
+                let exists = repo.path.join(proof_file).exists();
+                checks.push(CheckResult {
+                    name: check_name,
+                    passed: exists,
+                    stdout: format!(
+                        "Prover '{}': proof file '{}' {}",
+                        po.prover,
+                        proof_file,
+                        if exists { "exists" } else { "missing" }
+                    ),
+                    stderr: String::new(),
+                    exit_code: if exists { 0 } else { 1 },
+                    duration_secs: 0.0,
+                    findings: Vec::new(),
+                });
+            }
+        }
+    }
+
+    Ok(checks)
+}
+
+/// Run security-focused checks for repos with security-sensitive changes.
+///
+/// Attempts `cargo audit` and `cargo deny check` if available. Missing tools
+/// produce advisory-only (passing) results so the pipeline isn't blocked.
+pub fn run_security_checks(repo: &RepoConfig) -> anyhow::Result<Vec<CheckResult>> {
+    let mut checks = Vec::new();
+
+    // cargo-audit: check for known vulnerabilities
+    let audit = run_cmd("cargo_audit", "cargo audit", &repo.path);
+    match audit {
+        Ok(result) => checks.push(result),
+        Err(_) => {
+            checks.push(CheckResult {
+                name: "cargo_audit".into(),
+                passed: true,
+                stdout: "cargo-audit not installed (advisory only)".into(),
+                stderr: String::new(),
+                exit_code: 0,
+                duration_secs: 0.0,
+                findings: Vec::new(),
+            });
+        }
+    }
+
+    // cargo-deny: check licenses and advisories
+    let deny = run_cmd("cargo_deny", "cargo deny check", &repo.path);
+    match deny {
+        Ok(result) => checks.push(result),
+        Err(_) => {
+            checks.push(CheckResult {
+                name: "cargo_deny".into(),
+                passed: true,
+                stdout: "cargo-deny not installed (advisory only)".into(),
+                stderr: String::new(),
+                exit_code: 0,
+                duration_secs: 0.0,
+                findings: Vec::new(),
+            });
+        }
+    }
+
+    Ok(checks)
+}
+
 fn run_cmd(name: &str, cmd: &str, cwd: &std::path::Path) -> anyhow::Result<CheckResult> {
+    let check_start = Instant::now();
     tracing::info!(name, cmd, ?cwd, "running gate check");
 
     let output = Command::new("sh")
@@ -326,18 +951,27 @@ fn run_cmd(name: &str, cmd: &str, cwd: &std::path::Path) -> anyhow::Result<Check
         .current_dir(cwd)
         .output()?;
 
+    let duration = check_start.elapsed().as_secs_f64();
+
     let result = CheckResult {
         name: name.to_string(),
         passed: output.status.success(),
         stdout: String::from_utf8_lossy(&output.stdout).to_string(),
         stderr: String::from_utf8_lossy(&output.stderr).to_string(),
         exit_code: output.status.code().unwrap_or(-1),
+        duration_secs: duration,
+        findings: Vec::new(),
     };
 
     if result.passed {
-        tracing::info!(name, "check passed");
+        tracing::info!(name, duration_secs = duration, "check passed");
     } else {
-        tracing::warn!(name, exit_code = result.exit_code, "check failed");
+        tracing::warn!(
+            name,
+            exit_code = result.exit_code,
+            duration_secs = duration,
+            "check failed"
+        );
     }
 
     Ok(result)
@@ -346,6 +980,27 @@ fn run_cmd(name: &str, cmd: &str, cwd: &std::path::Path) -> anyhow::Result<Check
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::repo::default_checks;
+    use std::path::PathBuf;
+
+    fn test_repo_config() -> RepoConfig {
+        RepoConfig {
+            name: RepoName::new("test"),
+            path: PathBuf::from("/tmp/nonexistent"),
+            build_cmd: "cargo build".into(),
+            test_cmd: "cargo test".into(),
+            lint_cmd: "cargo clippy".into(),
+            fmt_cmd: "cargo fmt --check".into(),
+            verify_cmd: None,
+            proofs_cmd: None,
+            claude_md: None,
+            safety_target: None,
+            ci: None,
+            checks: default_checks(),
+            mutants: None,
+            trust: None,
+        }
+    }
 
     #[test]
     fn subsampled_cmd_differs_when_ratio_below_one() {
@@ -411,6 +1066,78 @@ mod tests {
         assert_eq!(result, original);
     }
 
+    #[test]
+    fn spec_compliance_check_passes_when_no_issues() {
+        let spec = crate::spec::Spec {
+            design: crate::spec::DesignSpec {
+                affected_files: vec!["src/lib.rs".into()],
+                ..Default::default()
+            },
+            ..Default::default()
+        };
+        let changed = vec!["src/lib.rs".into()];
+        let result =
+            run_spec_compliance_check(&spec, &changed, std::path::Path::new("/nonexistent"));
+        assert!(result.passed);
+        assert_eq!(result.name, "spec_compliance");
+    }
+
+    #[test]
+    fn spec_compliance_check_warns_missing_file() {
+        let spec = crate::spec::Spec {
+            design: crate::spec::DesignSpec {
+                affected_files: vec!["src/lib.rs".into(), "src/missing.rs".into()],
+                ..Default::default()
+            },
+            ..Default::default()
+        };
+        let changed = vec!["src/lib.rs".into()];
+        let result =
+            run_spec_compliance_check(&spec, &changed, std::path::Path::new("/nonexistent"));
+        // Warnings don't fail the check — only errors do
+        assert!(result.passed);
+        assert!(result.stdout.contains("src/missing.rs"));
+    }
+
+    #[test]
+    fn spec_compliance_check_fails_missing_proof() {
+        let spec = crate::spec::Spec {
+            proof_obligations: vec![crate::spec::ProofObligation {
+                property: "test".into(),
+                prover: "Z3".into(),
+                proof_file: Some("proofs/missing.z3".into()),
+            }],
+            ..Default::default()
+        };
+        let result =
+            run_spec_compliance_check(&spec, &[], std::path::Path::new("/nonexistent-repo-root"));
+        assert!(!result.passed);
+        assert!(result.stdout.contains("missing.z3"));
+    }
+
+    #[test]
+    fn spec_proof_checks_no_obligations() {
+        let spec = crate::spec::Spec::default();
+        let repo = RepoConfig {
+            name: RepoName::new("test"),
+            path: std::path::PathBuf::from("/tmp"),
+            build_cmd: "true".into(),
+            test_cmd: "true".into(),
+            fmt_cmd: "true".into(),
+            lint_cmd: "true".into(),
+            verify_cmd: None,
+            proofs_cmd: None,
+            claude_md: None,
+            safety_target: None,
+            ci: None,
+            checks: default_checks(),
+            mutants: None,
+            trust: None,
+        };
+        let checks = run_spec_proof_checks(&spec, &repo).unwrap();
+        assert!(checks.is_empty());
+    }
+
     #[test]
     fn different_tasks_get_different_seeds() {
         let config = SubsampleConfig {
@@ -428,4 +1155,261 @@ mod tests {
         assert!(result1.contains("SEED=1"));
         assert!(result2.contains("SEED=2"));
     }
+
+    // ─── Configurable checks tests ──────────────────────────────────────
+
+    #[test]
+    fn default_repo_has_three_checks() {
+        let repo = test_repo_config();
+        assert_eq!(repo.checks.len(), 3);
+        assert_eq!(repo.checks[0], "cargo_fmt");
+        assert_eq!(repo.checks[1], "cargo_clippy");
+        assert_eq!(repo.checks[2], "cargo_test");
+    }
+
+    #[test]
+    fn repo_with_expanded_checks() {
+        let mut repo = test_repo_config();
+        repo.checks = vec![
+            "cargo_fmt".into(),
+            "cargo_clippy".into(),
+            "cargo_test".into(),
+            "cargo_audit".into(),
+            "cargo_deny".into(),
+            "cargo_mutants".into(),
+        ];
+        assert_eq!(repo.checks.len(), 6);
+        assert!(repo.checks.contains(&"cargo_audit".to_string()));
+        assert!(repo.checks.contains(&"cargo_deny".to_string()));
+        assert!(repo.checks.contains(&"cargo_mutants".to_string()));
+    }
+
+    // ─── Audit findings parsing tests ───────────────────────────────────
+
+    #[test]
+    fn parse_audit_json_findings() {
+        let json = r#"{
+            "vulnerabilities": {
+                "found": 2,
+                "list": [
+                    {
+                        "advisory": {
+                            "id": "RUSTSEC-2024-0001",
+                            "title": "Buffer overflow in foo",
+                            "cvss": 9.1
+                        }
+                    },
+                    {
+                        "advisory": {
+                            "id": "RUSTSEC-2024-0002",
+                            "title": "Denial of service in bar",
+                            "cvss": 5.0
+                        }
+                    }
+                ]
+            }
+        }"#;
+
+        let findings = parse_audit_findings(json);
+        // 2 advisories + 1 summary = 3
+        assert_eq!(findings.len(), 3);
+
+        // First advisory: CVSS 9.1 -> error
+        assert_eq!(findings[0].category, "advisory");
+        assert_eq!(findings[0].severity, "error");
+        assert!(findings[0].message.contains("RUSTSEC-2024-0001"));
+        assert_eq!(findings[0].value, Some(9.1));
+
+        // Second advisory: CVSS 5.0 -> warning
+        assert_eq!(findings[1].severity, "warning");
+        assert_eq!(findings[1].value, Some(5.0));
+
+        // Summary
+        assert_eq!(findings[2].category, "summary");
+        assert_eq!(findings[2].value, Some(2.0));
+    }
+
+    #[test]
+    fn parse_audit_text_findings() {
+        let text = "Crate: foo\nRUSTSEC-2024-0001: Some issue\nOther line\n";
+        let findings = parse_audit_findings(text);
+        assert_eq!(findings.len(), 1);
+        assert!(findings[0].message.contains("RUSTSEC-2024-0001"));
+    }
+
+    #[test]
+    fn parse_audit_empty_output() {
+        let findings = parse_audit_findings("");
+        assert!(findings.is_empty());
+    }
+
+    // ─── Deny findings parsing tests ────────────────────────────────────
+
+    #[test]
+    fn parse_deny_error_findings() {
+        let output =
+            "error[L002]: license is DENIED for crate foo\nwarning[A001]: advisory found\n";
+        let findings = parse_deny_findings(output);
+        assert_eq!(findings.len(), 2);
+        assert_eq!(findings[0].severity, "error");
+        assert_eq!(findings[0].category, "policy_violation");
+        assert_eq!(findings[1].severity, "warning");
+        assert_eq!(findings[1].category, "policy_warning");
+    }
+
+    #[test]
+    fn parse_deny_clean_output() {
+        let output = "advisories ok, bans ok, licenses ok, sources ok\n";
+        let findings = parse_deny_findings(output);
+        assert!(findings.is_empty());
+    }
+
+    // ─── Mutation findings parsing tests ────────────────────────────────
+
+    #[test]
+    fn parse_mutants_survival_rate() {
+        let output = "42 mutants tested: 35 killed, 7 survived\n";
+        let (rate, findings) = parse_mutants_findings(output, 20.0);
+        let rate = rate.unwrap();
+        // 7/42 = 16.67%
+        assert!(rate > 16.0 && rate < 17.0);
+        assert!(!findings.is_empty());
+        // Should have a summary finding
+        assert!(findings.iter().any(|f| f.category == "mutation_summary"));
+        // 16.67% < 20% threshold, so no warning
+        assert!(
+            findings
+                .iter()
+                .all(|f| f.category != "mutation_survival_high")
+        );
+    }
+
+    #[test]
+    fn parse_mutants_high_survival_warns() {
+        let output = "10 mutants tested: 5 killed, 5 survived\n";
+        let (rate, findings) = parse_mutants_findings(output, 20.0);
+        let rate = rate.unwrap();
+        // 5/10 = 50%
+        assert!((rate - 50.0).abs() < 0.1);
+        // Should have a warning about high survival
+        assert!(
+            findings
+                .iter()
+                .any(|f| f.category == "mutation_survival_high")
+        );
+    }
+
+    #[test]
+    fn parse_mutants_no_output() {
+        let (rate, findings) = parse_mutants_findings("", 20.0);
+        assert!(rate.is_none());
+        assert!(findings.iter().any(|f| f.category == "mutation_parse"));
+    }
+
+    // ─── Check timing tests ────────────────────────────────────────────
+
+    #[test]
+    fn check_result_has_duration() {
+        let result = CheckResult {
+            name: "test".into(),
+            passed: true,
+            stdout: String::new(),
+            stderr: String::new(),
+            exit_code: 0,
+            duration_secs: 1.5,
+            findings: Vec::new(),
+        };
+        assert!((result.duration_secs - 1.5).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn check_result_default_duration_is_zero() {
+        // Verify that serde default gives 0.0
+        let json = r#"{"name":"test","passed":true,"stdout":"","stderr":"","exit_code":0}"#;
+        let result: CheckResult = serde_json::from_str(json).unwrap();
+        assert!((result.duration_secs - 0.0).abs() < f64::EPSILON);
+        assert!(result.findings.is_empty());
+    }
+
+    // ─── Structured gate results tests ──────────────────────────────────
+
+    #[test]
+    fn gate_report_is_structured() {
+        let report = GateReport {
+            level: GateLevel::Quality,
+            checks: vec![
+                CheckResult {
+                    name: "cargo_fmt".into(),
+                    passed: true,
+                    stdout: "ok".into(),
+                    stderr: String::new(),
+                    exit_code: 0,
+                    duration_secs: 0.5,
+                    findings: Vec::new(),
+                },
+                CheckResult {
+                    name: "cargo_audit".into(),
+                    passed: true,
+                    stdout: "{}".into(),
+                    stderr: String::new(),
+                    exit_code: 0,
+                    duration_secs: 2.3,
+                    findings: vec![CheckFinding {
+                        category: "summary".into(),
+                        severity: "info".into(),
+                        message: "0 vulnerabilities".into(),
+                        value: Some(0.0),
+                    }],
+                },
+            ],
+            passed: true,
+            duration_secs: 2.8,
+        };
+
+        // Gate results are structured data: each check has timing and findings
+        assert_eq!(report.checks.len(), 2);
+        assert!((report.checks[0].duration_secs - 0.5).abs() < f64::EPSILON);
+        assert!((report.checks[1].duration_secs - 2.3).abs() < f64::EPSILON);
+        assert_eq!(report.checks[1].findings.len(), 1);
+        assert_eq!(report.checks[1].findings[0].category, "summary");
+
+        // Can serialize to JSON for dashboard display
+        let json = serde_json::to_string(&report).unwrap();
+        assert!(json.contains("duration_secs"));
+        assert!(json.contains("findings"));
+        assert!(json.contains("cargo_audit"));
+    }
+
+    #[test]
+    fn check_finding_serialization() {
+        let finding = CheckFinding {
+            category: "advisory".into(),
+            severity: "error".into(),
+            message: "RUSTSEC-2024-0001: Buffer overflow".into(),
+            value: Some(9.1),
+        };
+        let json = serde_json::to_string(&finding).unwrap();
+        assert!(json.contains("advisory"));
+        assert!(json.contains("9.1"));
+
+        let parsed: CheckFinding = serde_json::from_str(&json).unwrap();
+        assert_eq!(parsed.category, "advisory");
+        assert_eq!(parsed.value, Some(9.1));
+    }
+
+    // ─── Extract number helper tests ────────────────────────────────────
+
+    #[test]
+    fn extract_number_before_keyword() {
+        assert_eq!(
+            extract_number_before("42 mutants tested", "mutants tested"),
+            Some(42)
+        );
+        assert_eq!(extract_number_before("7 survived", "survived"), Some(7));
+        assert_eq!(extract_number_before("no numbers here", "survived"), None);
+        assert_eq!(
+            extract_number_before("35 killed, 7 survived", "survived"),
+            Some(7)
+        );
+    }
 }
diff --git a/crates/thrum-core/src/lib.rs b/crates/thrum-core/src/lib.rs
index c24e090..161d127 100644
--- a/crates/thrum-core/src/lib.rs
+++ b/crates/thrum-core/src/lib.rs
@@ -1,20 +1,21 @@
-pub mod a2a;
 pub mod agent;
 pub mod budget;
 pub mod checkpoint;
-pub mod consistency;
+pub mod ci;
 pub mod convergence;
 pub mod coordination;
+pub mod dependency;
 pub mod event;
 pub mod gate;
 pub mod memory;
 pub mod repo;
 pub mod role;
-pub mod safety;
 pub mod session_export;
 pub mod spec;
-pub mod sphinx_needs;
 pub mod subsample;
+pub mod sync;
 pub mod task;
 pub mod telemetry;
 pub mod traceability;
+pub mod trust;
+pub mod verification;
diff --git a/crates/thrum-core/src/repo.rs b/crates/thrum-core/src/repo.rs
index 409b316..f592637 100644
--- a/crates/thrum-core/src/repo.rs
+++ b/crates/thrum-core/src/repo.rs
@@ -1,7 +1,50 @@
-use crate::task::{AsilLevel, RepoName};
-use serde::Deserialize;
+use crate::sync::SyncConfig;
+use crate::task::RepoName;
+use crate::trust::TrustConfig;
+use serde::{Deserialize, Serialize};
 use std::path::PathBuf;
 
+/// Automotive Safety Integrity Level (ISO 26262).
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
+pub enum AsilLevel {
+    Qm,
+    AsilA,
+    AsilB,
+    AsilC,
+    AsilD,
+}
+
+impl std::fmt::Display for AsilLevel {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            AsilLevel::Qm => write!(f, "QM"),
+            AsilLevel::AsilA => write!(f, "ASIL A"),
+            AsilLevel::AsilB => write!(f, "ASIL B"),
+            AsilLevel::AsilC => write!(f, "ASIL C"),
+            AsilLevel::AsilD => write!(f, "ASIL D"),
+        }
+    }
+}
+
+/// Which gate checks to run for a repository.
+///
+/// Defaults to `["cargo_fmt", "cargo_clippy", "cargo_test"]` — the original
+/// minimum viable harness. Repos can opt in to additional checks:
+/// - `cargo_audit`: scan for known vulnerabilities (CVSS >= 7.0 fails gate)
+/// - `cargo_deny`: license policy, banned crates, duplicate detection
+/// - `cargo_mutants`: mutation testing on changed files (opt-in)
+pub const DEFAULT_CHECKS: &[&str] = &["cargo_fmt", "cargo_clippy", "cargo_test"];
+
+/// All recognised check names for validation.
+pub const ALL_KNOWN_CHECKS: &[&str] = &[
+    "cargo_fmt",
+    "cargo_clippy",
+    "cargo_test",
+    "cargo_audit",
+    "cargo_deny",
+    "cargo_mutants",
+];
+
 /// Configuration for a managed repository.
 #[derive(Debug, Clone, Deserialize)]
 pub struct RepoConfig {
@@ -19,6 +62,126 @@ pub struct RepoConfig {
     pub claude_md: Option<PathBuf>,
     /// Functional safety target for this tool.
     pub safety_target: Option<AsilLevel>,
+    /// CI integration configuration (opt-in).
+    #[serde(default)]
+    pub ci: Option<CIConfig>,
+    /// Which gate checks to run for Gate 1 (Quality).
+    ///
+    /// Defaults to `["cargo_fmt", "cargo_clippy", "cargo_test"]`.
+    /// Add `"cargo_audit"`, `"cargo_deny"`, `"cargo_mutants"` to expand the harness.
+    #[serde(default = "default_checks")]
+    pub checks: Vec<String>,
+    /// Configuration for cargo-mutants (mutation testing).
+    /// Only used when `"cargo_mutants"` is in the `checks` list.
+    #[serde(default)]
+    pub mutants: Option<MutantsConfig>,
+    /// Trust boundary configuration for file-level risk classification.
+    #[serde(default)]
+    pub trust: Option<TrustConfig>,
+}
+
+pub fn default_checks() -> Vec<String> {
+    DEFAULT_CHECKS.iter().map(|s| (*s).to_string()).collect()
+}
+
+/// Configuration for cargo-mutants (mutation testing).
+#[derive(Debug, Clone, Deserialize, serde::Serialize)]
+pub struct MutantsConfig {
+    /// Only test mutations in files changed by the current task (default: true).
+    #[serde(default = "default_changed_only")]
+    pub changed_files_only: bool,
+    /// Maximum mutation survival rate before the check warns (percentage, 0-100).
+    /// Default: 20.0 (warn if > 20% of mutations survive).
+    #[serde(default = "default_max_survival_rate")]
+    pub max_survival_rate: f64,
+    /// Extra arguments to pass to cargo-mutants.
+    #[serde(default)]
+    pub extra_args: Vec<String>,
+    /// Timeout per mutant in seconds (default: 60).
+    #[serde(default = "default_mutant_timeout")]
+    pub timeout_secs: u64,
+}
+
+fn default_changed_only() -> bool {
+    true
+}
+
+fn default_max_survival_rate() -> f64 {
+    20.0
+}
+
+fn default_mutant_timeout() -> u64 {
+    60
+}
+
+impl Default for MutantsConfig {
+    fn default() -> Self {
+        Self {
+            changed_files_only: default_changed_only(),
+            max_survival_rate: default_max_survival_rate(),
+            extra_args: Vec::new(),
+            timeout_secs: default_mutant_timeout(),
+        }
+    }
+}
+
+/// CI integration configuration for a repository.
+///
+/// When present, the post-approval pipeline will push the branch,
+/// create a PR, and poll CI status instead of merging locally.
+#[derive(Debug, Clone, Deserialize)]
+pub struct CIConfig {
+    /// Whether CI integration is enabled.
+    #[serde(default = "default_ci_enabled")]
+    pub enabled: bool,
+    /// Polling interval in seconds (default: 60).
+    #[serde(default = "default_ci_poll_interval")]
+    pub poll_interval_secs: u64,
+    /// Maximum number of ci_fixer retries before escalating (default: 3).
+    #[serde(default = "default_max_ci_retries")]
+    pub max_ci_retries: u32,
+    /// Whether to auto-merge on green CI (default: true).
+    #[serde(default = "default_auto_merge")]
+    pub auto_merge: bool,
+    /// Merge strategy: "squash", "merge", "rebase" (default: "squash").
+    #[serde(default = "default_merge_strategy")]
+    pub merge_strategy: String,
+    /// Remote sync configuration.
+    #[serde(default)]
+    pub sync: SyncConfig,
+}
+
+fn default_ci_enabled() -> bool {
+    true
+}
+
+fn default_ci_poll_interval() -> u64 {
+    60
+}
+
+fn default_max_ci_retries() -> u32 {
+    3
+}
+
+fn default_auto_merge() -> bool {
+    true
+}
+
+fn default_merge_strategy() -> String {
+    "squash".into()
+}
+
+impl Default for CIConfig {
+    fn default() -> Self {
+        Self {
+            enabled: default_ci_enabled(),
+            poll_interval_secs: default_ci_poll_interval(),
+            max_ci_retries: default_max_ci_retries(),
+            auto_merge: default_auto_merge(),
+            merge_strategy: default_merge_strategy(),
+            sync: SyncConfig::default(),
+        }
+    }
 }
 
 impl RepoConfig {
@@ -70,9 +233,22 @@ mod tests {
             proofs_cmd: None,
             claude_md: None,
             safety_target: None,
+            ci: None,
+            checks: default_checks(),
+            mutants: None,
+            trust: None,
         }
     }
 
+    #[test]
+    fn ci_config_includes_sync_defaults() {
+        let ci = CIConfig::default();
+        assert!(ci.sync.enabled);
+        assert!(ci.sync.auto_rebase);
+        assert!(ci.sync.dispatch_rebase_agent);
+        assert_eq!(ci.sync.sync_strategy, crate::sync::SyncStrategy::Eager);
+    }
+
     #[test]
     fn with_work_dir_overrides_path_only() {
         let config = test_repo_config();
@@ -86,4 +262,262 @@ mod tests {
         assert_eq!(overridden.lint_cmd, config.lint_cmd);
         assert_eq!(overridden.fmt_cmd, config.fmt_cmd);
     }
+
+    #[test]
+    fn ci_config_default_values() {
+        let ci = CIConfig::default();
+        assert!(ci.enabled, "CI should be enabled by default");
+        assert_eq!(
+            ci.poll_interval_secs, 60,
+            "default poll interval should be 60s"
+        );
+        assert_eq!(ci.max_ci_retries, 3, "default max retries should be 3");
+        assert!(ci.auto_merge, "auto_merge should be true by default");
+        assert_eq!(
+            ci.merge_strategy, "squash",
+            "default merge strategy should be squash"
+        );
+    }
+
+    #[test]
+    fn ci_config_from_toml() {
+        let toml_str = r#"
+            enabled = true
+            poll_interval_secs = 120
+            max_ci_retries = 5
+            auto_merge = false
+            merge_strategy = "rebase"
+        "#;
+        let ci: CIConfig = toml::from_str(toml_str).unwrap();
+        assert!(ci.enabled);
+        assert_eq!(ci.poll_interval_secs, 120);
+        assert_eq!(ci.max_ci_retries, 5);
+        assert!(!ci.auto_merge);
+        assert_eq!(ci.merge_strategy, "rebase");
+    }
+
+    #[test]
+    fn ci_config_from_toml_with_defaults() {
+        let toml_str = r#"
+            poll_interval_secs = 30
+        "#;
+        let ci: CIConfig = toml::from_str(toml_str).unwrap();
+        assert!(ci.enabled);
+        assert_eq!(ci.poll_interval_secs, 30);
+        assert_eq!(ci.max_ci_retries, 3);
+        assert!(ci.auto_merge);
+        assert_eq!(ci.merge_strategy, "squash");
+    }
+
+    #[test]
+    fn repo_config_ci_opt_in() {
+        let config = test_repo_config();
+        let ci_enabled = config.ci.as_ref().is_some_and(|ci| ci.enabled);
+        assert!(!ci_enabled, "CI should be opt-in (disabled when ci=None)");
+    }
+
+    #[test]
+    fn repo_config_with_ci_enabled() {
+        let mut config = test_repo_config();
+        config.ci = Some(CIConfig::default());
+        let ci_enabled = config.ci.as_ref().is_some_and(|ci| ci.enabled);
+        assert!(
+            ci_enabled,
+            "CI should be enabled when section is present with defaults"
+        );
+    }
+
+    #[test]
+    fn repo_config_with_ci_disabled() {
+        let mut config = test_repo_config();
+        config.ci = Some(CIConfig {
+            enabled: false,
+            ..CIConfig::default()
+        });
+        let ci_enabled = config.ci.as_ref().is_some_and(|ci| ci.enabled);
+        assert!(!ci_enabled, "CI should be disabled when enabled=false");
+    }
+
+    #[test]
+    fn with_work_dir_preserves_ci_config() {
+        let mut config = test_repo_config();
+        config.ci = Some(CIConfig {
+            poll_interval_secs: 30,
+            max_ci_retries: 5,
+            ..CIConfig::default()
+        });
+        let overridden = config.with_work_dir(PathBuf::from("/worktree"));
+        assert!(
+            overridden.ci.is_some(),
+            "CI config should be preserved in worktree"
+        );
+        let ci = overridden.ci.unwrap();
+        assert_eq!(ci.poll_interval_secs, 30);
+        assert_eq!(ci.max_ci_retries, 5);
+    }
+
+    // ─── Gate checks configurability tests ──────────────────────────────
+
+    #[test]
+    fn default_checks_are_fmt_clippy_test() {
+        let config = test_repo_config();
+        assert_eq!(
+            config.checks,
+            vec!["cargo_fmt", "cargo_clippy", "cargo_test"]
+        );
+    }
+
+    #[test]
+    fn expanded_checks_from_toml() {
+        let toml_str = r#"
+            name = "myrepo"
+            path = "/tmp/myrepo"
+            build_cmd = "cargo build"
+            test_cmd = "cargo test"
+            lint_cmd = "cargo clippy"
+            fmt_cmd = "cargo fmt --check"
+            checks = ["cargo_fmt", "cargo_clippy", "cargo_test", "cargo_audit", "cargo_deny"]
+        "#;
+        let config: RepoConfig = toml::from_str(toml_str).unwrap();
+        assert_eq!(config.checks.len(), 5);
+        assert!(config.checks.contains(&"cargo_audit".to_string()));
+        assert!(config.checks.contains(&"cargo_deny".to_string()));
+    }
+
+    #[test]
+    fn mutants_config_from_toml() {
+        let toml_str = r#"
+            name = "myrepo"
+            path = "/tmp/myrepo"
+            build_cmd = "cargo build"
+            test_cmd = "cargo test"
+            lint_cmd = "cargo clippy"
+            fmt_cmd = "cargo fmt --check"
+            checks = ["cargo_fmt", "cargo_clippy", "cargo_test", "cargo_mutants"]
+
+            [mutants]
+            changed_files_only = true
+            max_survival_rate = 15.0
+            timeout_secs = 120
+        "#;
+        let config: RepoConfig = toml::from_str(toml_str).unwrap();
+        assert!(config.checks.contains(&"cargo_mutants".to_string()));
+        let mutants = config.mutants.unwrap();
+        assert!(mutants.changed_files_only);
+        assert!((mutants.max_survival_rate - 15.0).abs() < f64::EPSILON);
+        assert_eq!(mutants.timeout_secs, 120);
+    }
+
+    #[test]
+    fn mutants_config_defaults() {
+        let mc = MutantsConfig::default();
+        assert!(mc.changed_files_only);
+        assert!((mc.max_survival_rate - 20.0).abs() < f64::EPSILON);
+        assert_eq!(mc.timeout_secs, 60);
+        assert!(mc.extra_args.is_empty());
+    }
+
+    #[test]
+    fn checks_default_when_omitted_from_toml() {
+        let toml_str = r#"
+            name = "myrepo"
+            path = "/tmp/myrepo"
+            build_cmd = "cargo build"
+            test_cmd = "cargo test"
+            lint_cmd = "cargo clippy"
+            fmt_cmd = "cargo fmt --check"
+        "#;
+        let config: RepoConfig = toml::from_str(toml_str).unwrap();
+        assert_eq!(
+            config.checks,
+            vec!["cargo_fmt", "cargo_clippy", "cargo_test"],
+            "omitted checks should default to the basic three"
+        );
+    }
+
+    #[test]
+    fn with_work_dir_preserves_checks() {
+        let mut config = test_repo_config();
+        config.checks = vec![
+            "cargo_fmt".into(),
+            "cargo_clippy".into(),
+            "cargo_test".into(),
+            "cargo_audit".into(),
+        ];
+        let overridden = config.with_work_dir(PathBuf::from("/worktree"));
+        assert_eq!(overridden.checks.len(), 4);
+        assert!(overridden.checks.contains(&"cargo_audit".to_string()));
+    }
+
+    // ─── Trust boundary tests ──────────────────────────────────────────
+
+    #[test]
+    fn repo_config_trust_opt_in() {
+        let config = test_repo_config();
+        assert!(
+            config.trust.is_none(),
+            "Trust should be opt-in (None when not specified)"
+        );
+    }
+
+    #[test]
+    fn repo_config_trust_from_toml() {
+        let toml_str = r#"
+            name = "my-project"
+            path = "/tmp/project"
+            build_cmd = "cargo build"
+            test_cmd = "cargo test"
+            lint_cmd = "cargo clippy"
+            fmt_cmd = "cargo fmt --check"
+
+            [trust]
+            high_risk = ["src/crypto/**", "Cargo.lock"]
+            security_sensitive = ["Cargo.toml", "build.rs"]
+            auto_ok = ["docs/**", "*.md"]
+        "#;
+        let config: RepoConfig = toml::from_str(toml_str).unwrap();
+        let trust = config.trust.expect("trust config should parse");
+        assert_eq!(trust.high_risk.len(), 2);
+        assert_eq!(trust.security_sensitive.len(), 2);
+        assert_eq!(trust.auto_ok.len(), 2);
+    }
+
+    #[test]
+    fn repo_config_trust_partial_from_toml() {
+        let toml_str = r#"
+            name = "myrepo"
+            path = "/tmp/myrepo"
+            build_cmd = "cargo build"
+            test_cmd = "cargo test"
+            lint_cmd = "cargo clippy"
+            fmt_cmd = "cargo fmt --check"
+
+            [trust]
+            high_risk = ["*.lock"]
+        "#;
+        let config: RepoConfig = toml::from_str(toml_str).unwrap();
+        let trust = config.trust.expect("trust config should parse");
+        assert_eq!(trust.high_risk.len(), 1);
+        assert!(trust.security_sensitive.is_empty());
+        assert!(trust.auto_ok.is_empty());
+    }
+
+    #[test]
+    fn with_work_dir_preserves_trust_config() {
+        let mut config = test_repo_config();
+        config.trust = Some(TrustConfig {
+            high_risk: vec!["src/crypto/**".into()],
+            security_sensitive: vec!["Cargo.toml".into()],
+            auto_ok: vec!["docs/**".into()],
+        });
+        let overridden = config.with_work_dir(PathBuf::from("/worktree"));
+        assert!(
+            overridden.trust.is_some(),
+            "Trust config should be preserved in worktree"
+        );
+        let trust = overridden.trust.unwrap();
+        assert_eq!(trust.high_risk.len(), 1);
+        assert_eq!(trust.security_sensitive.len(), 1);
+        assert_eq!(trust.auto_ok.len(), 1);
+    }
 }
diff --git a/crates/thrum-core/src/role.rs b/crates/thrum-core/src/role.rs
index 18578bf..bb2e152 100644
--- a/crates/thrum-core/src/role.rs
+++ b/crates/thrum-core/src/role.rs
@@ -1,5 +1,36 @@
 use serde::Deserialize;
 use std::collections::HashMap;
+use std::fmt;
+
+/// Strategy for recovering from agent timeouts.
+///
+/// Different roles benefit from different recovery strategies:
+/// - Implementation timeouts: retry with session continuation to resume partial work
+/// - Review timeouts: skip review rather than blocking the pipeline
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize)]
+#[serde(rename_all = "kebab-case")]
+pub enum TimeoutRecoveryStrategy {
+    /// Treat timeout as a failure (existing behavior). Task transitions to failed state.
+    #[default]
+    Fail,
+    /// Retry with session continuation. For implementers, resumes from the last checkpoint.
+    Retry,
+    /// Skip the timed-out step. For reviewers, auto-approves with a "review-skipped-timeout" note.
+    Skip,
+    /// Double the timeout and retry once. Falls back to `Fail` if the extended timeout also expires.
+    Extend,
+}
+
+impl fmt::Display for TimeoutRecoveryStrategy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Fail => write!(f, "fail"),
+            Self::Retry => write!(f, "retry"),
+            Self::Skip => write!(f, "skip"),
+            Self::Extend => write!(f, "extend"),
+        }
+    }
+}
 
 /// Configuration for an agent role (implementer, reviewer, planner, etc.).
 #[derive(Debug, Clone, Deserialize)]
@@ -12,6 +43,9 @@ pub struct AgentRole {
     pub budget_usd: Option<f64>,
     /// Timeout in seconds for this role's invocations.
     pub timeout_secs: Option<u64>,
+    /// What to do when this role's invocation times out.
+    #[serde(default)]
+    pub timeout_recovery: TimeoutRecoveryStrategy,
 }
 
 /// Declarative backend configuration loaded from pipeline.toml `[[backends]]`.
@@ -80,6 +114,7 @@ impl RolesConfig {
             prompt_template: "agents/implementer.md".into(),
             budget_usd: Some(6.0),
             timeout_secs: Some(600),
+            timeout_recovery: TimeoutRecoveryStrategy::Retry,
         })
     }
 
@@ -90,6 +125,7 @@ impl RolesConfig {
             prompt_template: "agents/reviewer.md".into(),
             budget_usd: Some(1.0),
             timeout_secs: Some(300),
+            timeout_recovery: TimeoutRecoveryStrategy::Skip,
         })
     }
 
@@ -100,6 +136,18 @@ impl RolesConfig {
             prompt_template: "agents/planner.md".into(),
             budget_usd: Some(1.0),
             timeout_secs: Some(300),
+            timeout_recovery: TimeoutRecoveryStrategy::default(),
+        })
+    }
+
+    /// Get the ci_fixer role, falling back to defaults.
+    pub fn ci_fixer(&self) -> AgentRole {
+        self.roles.get("ci_fixer").cloned().unwrap_or(AgentRole {
+            backend: "opus".into(),
+            prompt_template: "agents/ci_fixer.md".into(),
+            budget_usd: Some(3.0),
+            timeout_secs: Some(600),
+            timeout_recovery: TimeoutRecoveryStrategy::Retry,
         })
     }
 }
@@ -114,6 +162,7 @@ impl Default for RolesConfig {
                 prompt_template: "agents/implementer.md".into(),
                 budget_usd: Some(6.0),
                 timeout_secs: Some(600),
+                timeout_recovery: TimeoutRecoveryStrategy::Retry,
             },
         );
         roles.insert(
@@ -123,6 +172,7 @@ impl Default for RolesConfig {
                 prompt_template: "agents/reviewer.md".into(),
                 budget_usd: Some(1.0),
                 timeout_secs: Some(300),
+                timeout_recovery: TimeoutRecoveryStrategy::Skip,
             },
         );
         roles.insert(
@@ -132,6 +182,7 @@ impl Default for RolesConfig {
                 prompt_template: "agents/planner.md".into(),
                 budget_usd: Some(1.0),
                 timeout_secs: Some(300),
+                timeout_recovery: TimeoutRecoveryStrategy::default(),
             },
         );
         Self { roles }
@@ -150,6 +201,15 @@ mod tests {
         assert!(config.get("planner").is_some());
         assert_eq!(config.implementer().backend, "opus");
         assert_eq!(config.reviewer().backend, "sonnet");
+        // Verify default timeout recovery strategies
+        assert_eq!(
+            config.implementer().timeout_recovery,
+            TimeoutRecoveryStrategy::Retry
+        );
+        assert_eq!(
+            config.reviewer().timeout_recovery,
+            TimeoutRecoveryStrategy::Skip
+        );
     }
 
     #[test]
@@ -162,14 +222,133 @@ mod tests {
                 prompt_template: "agents/fast_impl.md".into(),
                 budget_usd: Some(0.5),
                 timeout_secs: Some(120),
+                timeout_recovery: TimeoutRecoveryStrategy::Extend,
             },
         );
         let config = RolesConfig { roles };
         assert_eq!(config.implementer().backend, "haiku");
+        assert_eq!(
+            config.implementer().timeout_recovery,
+            TimeoutRecoveryStrategy::Extend
+        );
         // Reviewer falls back to default since not configured
         assert_eq!(config.reviewer().backend, "sonnet");
     }
 
+    #[test]
+    fn timeout_recovery_strategy_display() {
+        assert_eq!(TimeoutRecoveryStrategy::Fail.to_string(), "fail");
+        assert_eq!(TimeoutRecoveryStrategy::Retry.to_string(), "retry");
+        assert_eq!(TimeoutRecoveryStrategy::Skip.to_string(), "skip");
+        assert_eq!(TimeoutRecoveryStrategy::Extend.to_string(), "extend");
+    }
+
+    #[test]
+    fn timeout_recovery_strategy_default() {
+        assert_eq!(
+            TimeoutRecoveryStrategy::default(),
+            TimeoutRecoveryStrategy::Fail
+        );
+    }
+
+    #[test]
+    fn timeout_recovery_strategy_deserialize() {
+        #[derive(Deserialize)]
+        struct Wrapper {
+            strategy: TimeoutRecoveryStrategy,
+        }
+
+        let w: Wrapper = toml::from_str(r#"strategy = "skip""#).unwrap();
+        assert_eq!(w.strategy, TimeoutRecoveryStrategy::Skip);
+
+        let w: Wrapper = toml::from_str(r#"strategy = "retry""#).unwrap();
+        assert_eq!(w.strategy, TimeoutRecoveryStrategy::Retry);
+
+        let w: Wrapper = toml::from_str(r#"strategy = "extend""#).unwrap();
+        assert_eq!(w.strategy, TimeoutRecoveryStrategy::Extend);
+
+        let w: Wrapper = toml::from_str(r#"strategy = "fail""#).unwrap();
+        assert_eq!(w.strategy, TimeoutRecoveryStrategy::Fail);
+    }
+
+    #[test]
+    fn timeout_recovery_from_pipeline_toml_format() {
+        // Verify that timeout_recovery is properly deserialized from the
+        // pipeline.toml roles format (the actual config used in production).
+        let toml_str = r#"
+            [roles.implementer]
+            backend = "opus"
+            prompt_template = "agents/implementer.md"
+            budget_usd = 6.0
+            timeout_secs = 2400
+            timeout_recovery = "retry"
+
+            [roles.reviewer]
+            backend = "sonnet"
+            prompt_template = "agents/reviewer.md"
+            budget_usd = 1.0
+            timeout_secs = 300
+            timeout_recovery = "skip"
+
+            [roles.planner]
+            backend = "opus"
+            prompt_template = "agents/planner.md"
+            budget_usd = 1.0
+            timeout_secs = 300
+            timeout_recovery = "fail"
+
+            [roles.ci_fixer]
+            backend = "opus"
+            prompt_template = "agents/ci_fixer.md"
+            budget_usd = 3.0
+            timeout_secs = 600
+            timeout_recovery = "retry"
+        "#;
+
+        let config: RolesConfig = toml::from_str(toml_str).unwrap();
+        assert_eq!(
+            config.get("implementer").unwrap().timeout_recovery,
+            TimeoutRecoveryStrategy::Retry
+        );
+        assert_eq!(
+            config.get("reviewer").unwrap().timeout_recovery,
+            TimeoutRecoveryStrategy::Skip
+        );
+        assert_eq!(
+            config.get("planner").unwrap().timeout_recovery,
+            TimeoutRecoveryStrategy::Fail
+        );
+        assert_eq!(
+            config.get("ci_fixer").unwrap().timeout_recovery,
+            TimeoutRecoveryStrategy::Retry
+        );
+    }
+
+    #[test]
+    fn timeout_recovery_defaults_when_omitted_from_toml() {
+        // When timeout_recovery is not specified in the TOML, it should
+        // default to Fail (the serde default). The method fallbacks
+        // (implementer() → Retry, reviewer() → Skip) provide role-specific
+        // defaults only when the role is entirely unconfigured.
+        let toml_str = r#"
+            [roles.implementer]
+            backend = "opus"
+            prompt_template = "agents/implementer.md"
+        "#;
+
+        let config: RolesConfig = toml::from_str(toml_str).unwrap();
+        // Directly accessed from the map: default is Fail
+        assert_eq!(
+            config.get("implementer").unwrap().timeout_recovery,
+            TimeoutRecoveryStrategy::Fail
+        );
+        // Accessed via method: reviewer is not in the map, so method fallback applies
+        assert_eq!(
+            config.reviewer().timeout_recovery,
+            TimeoutRecoveryStrategy::Skip
+        );
+    }
+
     #[test]
     fn backend_config_deserialize() {
         let toml_str = r#"
diff --git a/crates/thrum-core/src/safety.rs b/crates/thrum-core/src/safety.rs
deleted file mode 100644
index 40a518d..0000000
--- a/crates/thrum-core/src/safety.rs
+++ /dev/null
@@ -1,590 +0,0 @@
-//! Multi-standard functional safety classification.
-//!
-//! Supports:
-//! - ISO 26262 (Automotive): ASIL levels, TCL via TI×TD matrix
-//! - IEC 62304 (Medical): Software safety classes A/B/C
-//! - DO-178C (Avionics): Design Assurance Levels A-E
-//! - IEC 61508 (Industrial): SIL 1-4
-//!
-//! Also handles OSS/SOUP qualification tracking per each standard.
-
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Serialize};
-use std::path::PathBuf;
-
-// ─── Tool Confidence Level (ISO 26262 Part 8, Clause 11) ───────────────
-
-/// Tool Impact classification.
-/// TI1: The tool cannot introduce or fail to detect errors in a safety-related item.
-/// TI2: The tool can introduce or fail to detect errors.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub enum ToolImpact {
-    /// Tool has no impact on safety-related outputs.
-    Ti1,
-    /// Tool can introduce or fail to detect errors in safety items.
-    Ti2,
-}
-
-/// Tool error Detection capability.
-/// How likely are tool-introduced errors to be caught by downstream activities?
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub enum ToolDetection {
-    /// High confidence: strong measures exist to detect tool errors
-    /// (e.g., Z3 verification, formal proofs, independent back-to-back testing).
-    Td1,
-    /// Medium confidence: some measures exist
-    /// (e.g., comprehensive test suites, code review).
-    Td2,
-    /// Low confidence: weak or no measures to detect tool errors.
-    Td3,
-}
-
-/// Tool Confidence Level (result of TI × TD classification).
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
-pub enum Tcl {
-    /// Low confidence needed — minimal qualification effort.
-    Tcl1,
-    /// Medium confidence — increased qualification effort.
-    Tcl2,
-    /// High confidence — maximum qualification effort.
-    Tcl3,
-}
-
-impl std::fmt::Display for Tcl {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Tcl::Tcl1 => write!(f, "TCL1"),
-            Tcl::Tcl2 => write!(f, "TCL2"),
-            Tcl::Tcl3 => write!(f, "TCL3"),
-        }
-    }
-}
-
-/// Determine TCL from TI and TD per ISO 26262 Part 8, Table 4.
-///
-/// ```text
-///        │ TD1   │ TD2   │ TD3
-/// ───────┼───────┼───────┼──────
-///  TI1   │ TCL1  │ TCL1  │ TCL1
-///  TI2   │ TCL1  │ TCL2  │ TCL3
-/// ```
-pub fn determine_tcl(ti: ToolImpact, td: ToolDetection) -> Tcl {
-    match (ti, td) {
-        (ToolImpact::Ti1, _) => Tcl::Tcl1,
-        (ToolImpact::Ti2, ToolDetection::Td1) => Tcl::Tcl1,
-        (ToolImpact::Ti2, ToolDetection::Td2) => Tcl::Tcl2,
-        (ToolImpact::Ti2, ToolDetection::Td3) => Tcl::Tcl3,
-    }
-}
-
-/// Qualification methods required per TCL level (ISO 26262 Part 8, Table 5).
-pub fn qualification_methods(tcl: Tcl) -> Vec<&'static str> {
-    match tcl {
-        Tcl::Tcl1 => vec![],
-        Tcl::Tcl2 => vec![
-            "1a: Increased confidence from use",
-            "1b: Evaluation of the tool development process",
-            "1c: Validation of the software tool",
-            "1d: Development in accordance with a safety standard",
-        ],
-        Tcl::Tcl3 => vec![
-            "1b: Evaluation of the tool development process",
-            "1c: Validation of the software tool",
-            "1d: Development in accordance with a safety standard",
-        ],
-    }
-}
-
-// ─── Tool Qualification Record ─────────────────────────────────────────
-
-/// Complete tool qualification record per ISO 26262 Part 8.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ToolQualification {
-    pub tool_name: String,
-    pub tool_version: String,
-    pub ti: ToolImpact,
-    pub td: ToolDetection,
-    pub tcl: Tcl,
-    pub target_asil: Option<AsilLevel>,
-    pub qualification_methods: Vec<String>,
-    pub evidence: Vec<QualificationEvidence>,
-    pub oss_info: Option<OssQualification>,
-    pub use_cases: Vec<ToolUseCase>,
-}
-
-/// Describes how a tool is used in the safety lifecycle.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ToolUseCase {
-    pub description: String,
-    /// Which phase of the V-model this use case belongs to.
-    pub lifecycle_phase: String,
-    /// What safety-related output the tool produces.
-    pub output_description: String,
-    /// How errors in this output would be detected.
-    pub detection_measures: Vec<String>,
-}
-
-/// Evidence supporting tool qualification.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct QualificationEvidence {
-    pub method: String,
-    pub description: String,
-    pub artifact_path: Option<PathBuf>,
-    pub status: EvidenceStatus,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub enum EvidenceStatus {
-    NotStarted,
-    InProgress,
-    Complete,
-    NotApplicable,
-}
-
-// ─── ISO 26262 ASIL ────────────────────────────────────────────────────
-
-/// Automotive Safety Integrity Level (ISO 26262).
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
-pub enum AsilLevel {
-    Qm,
-    AsilA,
-    AsilB,
-    AsilC,
-    AsilD,
-}
-
-impl std::fmt::Display for AsilLevel {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            AsilLevel::Qm => write!(f, "QM"),
-            AsilLevel::AsilA => write!(f, "ASIL A"),
-            AsilLevel::AsilB => write!(f, "ASIL B"),
-            AsilLevel::AsilC => write!(f, "ASIL C"),
-            AsilLevel::AsilD => write!(f, "ASIL D"),
-        }
-    }
-}
-
-// ─── IEC 62304 (Medical Device Software) ───────────────────────────────
-
-/// Software safety classification per IEC 62304.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
-pub enum Iec62304Class {
-    /// No injury or damage to health possible.
-    ClassA,
-    /// Non-serious injury possible.
-    ClassB,
-    /// Death or serious injury possible.
-    ClassC,
-}
-
-impl std::fmt::Display for Iec62304Class {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Iec62304Class::ClassA => write!(f, "IEC 62304 Class A"),
-            Iec62304Class::ClassB => write!(f, "IEC 62304 Class B"),
-            Iec62304Class::ClassC => write!(f, "IEC 62304 Class C"),
-        }
-    }
-}
-
-/// IEC 62304 required activities per software safety class.
-pub fn iec62304_required_activities(class: Iec62304Class) -> Vec<&'static str> {
-    match class {
-        Iec62304Class::ClassA => vec![
-            "5.1: Software development planning",
-            "5.2: Software requirements analysis",
-            "5.8: Software release",
-            "6.1: Software maintenance plan",
-            "7.1: Software risk management",
-            "8.1: Software configuration management",
-        ],
-        Iec62304Class::ClassB => vec![
-            "5.1: Software development planning",
-            "5.2: Software requirements analysis",
-            "5.3: Software architectural design",
-            "5.5: Software integration and integration testing",
-            "5.7: Software system testing",
-            "5.8: Software release",
-            "6.1: Software maintenance plan",
-            "7.1-7.4: Software risk management",
-            "8.1-8.3: Software configuration management",
-            "9.8: Software problem resolution",
-        ],
-        Iec62304Class::ClassC => vec![
-            "5.1: Software development planning",
-            "5.2: Software requirements analysis",
-            "5.3: Software architectural design",
-            "5.4: Software detailed design",
-            "5.5: Software integration and integration testing",
-            "5.6: Software verification",
-            "5.7: Software system testing",
-            "5.8: Software release",
-            "6.1: Software maintenance plan",
-            "7.1-7.4: Software risk management (full)",
-            "8.1-8.3: Software configuration management (full)",
-            "9.1-9.8: Software problem resolution (full)",
-        ],
-    }
-}
-
-// ─── DO-178C (Avionics) ────────────────────────────────────────────────
-
-/// Design Assurance Level per DO-178C.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
-pub enum DalLevel {
-    /// Catastrophic failure condition.
-    DalA,
-    /// Hazardous/Severe-Major failure condition.
-    DalB,
-    /// Major failure condition.
-    DalC,
-    /// Minor failure condition.
-    DalD,
-    /// No effect on aircraft safety.
-    DalE,
-}
-
-impl std::fmt::Display for DalLevel {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            DalLevel::DalA => write!(f, "DAL A"),
-            DalLevel::DalB => write!(f, "DAL B"),
-            DalLevel::DalC => write!(f, "DAL C"),
-            DalLevel::DalD => write!(f, "DAL D"),
-            DalLevel::DalE => write!(f, "DAL E"),
-        }
-    }
-}
-
-// ─── IEC 61508 (Industrial) ────────────────────────────────────────────
-
-/// Safety Integrity Level per IEC 61508.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
-pub enum SilLevel {
-    Sil1,
-    Sil2,
-    Sil3,
-    Sil4,
-}
-
-impl std::fmt::Display for SilLevel {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            SilLevel::Sil1 => write!(f, "SIL 1"),
-            SilLevel::Sil2 => write!(f, "SIL 2"),
-            SilLevel::Sil3 => write!(f, "SIL 3"),
-            SilLevel::Sil4 => write!(f, "SIL 4"),
-        }
-    }
-}
-
-// ─── Unified Safety Classification ─────────────────────────────────────
-
-/// Multi-standard safety classification for a tool or component.
-/// A tool can be classified under multiple standards simultaneously.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SafetyClassification {
-    pub automotive: Option<AsilLevel>,
-    pub medical: Option<Iec62304Class>,
-    pub avionics: Option<DalLevel>,
-    pub industrial: Option<SilLevel>,
-}
-
-impl SafetyClassification {
-    pub fn automotive(asil: AsilLevel) -> Self {
-        Self {
-            automotive: Some(asil),
-            medical: None,
-            avionics: None,
-            industrial: None,
-        }
-    }
-}
-
-// ─── OSS / SOUP Qualification ──────────────────────────────────────────
-
-/// OSS component qualification record.
-/// Covers ISO 26262 Part 8 Clause 12 (SW component qualification)
-/// and IEC 62304 SOUP management.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct OssQualification {
-    pub component_name: String,
-    pub version: String,
-    pub license: String,
-    pub repository_url: String,
-    /// Is this component developed with a known process?
-    pub development_process_known: bool,
-    /// Has the development process been evaluated?
-    pub process_evaluation: Option<ProcessEvaluation>,
-    /// Known anomalies / CVEs relevant to our use case.
-    pub known_anomalies: Vec<KnownAnomaly>,
-    /// How this component is used in the safety context.
-    pub usage_context: String,
-    /// What happens if this component fails?
-    pub failure_impact: String,
-    /// Risk control measures for component failure.
-    pub risk_controls: Vec<String>,
-    /// Verification evidence specific to this component.
-    pub verification_evidence: Vec<QualificationEvidence>,
-}
-
-/// Evaluation of an OSS project's development process.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ProcessEvaluation {
-    pub has_ci: bool,
-    pub has_tests: bool,
-    pub has_formal_verification: bool,
-    pub has_code_review: bool,
-    pub has_release_process: bool,
-    pub has_issue_tracking: bool,
-    pub has_documentation: bool,
-    pub evaluation_date: DateTime<Utc>,
-    pub evaluator: String,
-    pub notes: String,
-}
-
-/// A known anomaly (bug, CVE, limitation) in a component.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct KnownAnomaly {
-    pub id: String,
-    pub description: String,
-    pub severity: AnomalySeverity,
-    pub affects_safety: bool,
-    pub mitigation: Option<String>,
-    pub status: AnomalyStatus,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub enum AnomalySeverity {
-    Low,
-    Medium,
-    High,
-    Critical,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub enum AnomalyStatus {
-    Open,
-    Mitigated,
-    Fixed,
-    WontFix,
-    NotApplicable,
-}
-
-// ─── SOUP Registry (IEC 62304) ─────────────────────────────────────────
-
-/// SOUP (Software of Unknown Provenance) item per IEC 62304 §8.1.2.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SoupItem {
-    pub name: String,
-    pub version: String,
-    pub manufacturer: String,
-    pub unique_id: String,
-    pub known_anomalies: Vec<KnownAnomaly>,
-    /// Functional and performance requirements relevant to safety.
-    pub requirements: Vec<String>,
-    /// Hardware/software compatibility requirements.
-    pub compatibility: Vec<String>,
-}
-
-/// Full SOUP registry for a project.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SoupRegistry {
-    pub items: Vec<SoupItem>,
-    pub last_updated: DateTime<Utc>,
-}
-
-impl SoupRegistry {
-    pub fn new() -> Self {
-        Self {
-            items: Vec::new(),
-            last_updated: Utc::now(),
-        }
-    }
-}
-
-impl Default for SoupRegistry {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-// ─── ASPICE Process Reference ──────────────────────────────────────────
-
-/// Automotive SPICE process areas relevant to the automator.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub enum AspiceProcess {
-    /// SWE.1: Software Requirements Analysis
-    Swe1,
-    /// SWE.2: Software Architectural Design
-    Swe2,
-    /// SWE.3: Software Detailed Design and Unit Construction
-    Swe3,
-    /// SWE.4: Software Unit Verification
-    Swe4,
-    /// SWE.5: Software Integration and Integration Test
-    Swe5,
-    /// SWE.6: Software Qualification Test
-    Swe6,
-    /// SUP.8: Configuration Management
-    Sup8,
-    /// SUP.9: Problem Resolution Management
-    Sup9,
-    /// SUP.10: Change Request Management
-    Sup10,
-    /// MAN.3: Project Management
-    Man3,
-}
-
-impl std::fmt::Display for AspiceProcess {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            AspiceProcess::Swe1 => write!(f, "SWE.1 SW Requirements Analysis"),
-            AspiceProcess::Swe2 => write!(f, "SWE.2 SW Architectural Design"),
-            AspiceProcess::Swe3 => write!(f, "SWE.3 SW Detailed Design & Unit Construction"),
-            AspiceProcess::Swe4 => write!(f, "SWE.4 SW Unit Verification"),
-            AspiceProcess::Swe5 => write!(f, "SWE.5 SW Integration & Integration Test"),
-            AspiceProcess::Swe6 => write!(f, "SWE.6 SW Qualification Test"),
-            AspiceProcess::Sup8 => write!(f, "SUP.8 Configuration Management"),
-            AspiceProcess::Sup9 => write!(f, "SUP.9 Problem Resolution Management"),
-            AspiceProcess::Sup10 => write!(f, "SUP.10 Change Request Management"),
-            AspiceProcess::Man3 => write!(f, "MAN.3 Project Management"),
-        }
-    }
-}
-
-/// Map automator pipeline stages to ASPICE processes.
-pub fn pipeline_aspice_mapping() -> Vec<(String, AspiceProcess)> {
-    vec![
-        ("Planner".into(), AspiceProcess::Swe1),
-        ("Planner (architecture)".into(), AspiceProcess::Swe2),
-        ("Implementer".into(), AspiceProcess::Swe3),
-        ("Gate 1: Unit tests".into(), AspiceProcess::Swe4),
-        ("Gate 3: Integration tests".into(), AspiceProcess::Swe5),
-        ("Release: Qualification tests".into(), AspiceProcess::Swe6),
-        ("Git operations".into(), AspiceProcess::Sup8),
-        ("Task rejection/feedback".into(), AspiceProcess::Sup9),
-        ("Task queue management".into(), AspiceProcess::Sup10),
-        ("Budget/status tracking".into(), AspiceProcess::Man3),
-    ]
-}
-
-// ─── Safety Configuration ──────────────────────────────────────────────
-
-/// Top-level safety configuration (parsed from safety.toml).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SafetyConfig {
-    pub tools: Vec<ToolSafetyConfig>,
-    pub soup_items: Vec<SoupItem>,
-}
-
-/// Per-tool safety configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ToolSafetyConfig {
-    pub name: String,
-    pub ti: ToolImpact,
-    pub td: ToolDetection,
-    pub classification: SafetyClassification,
-    pub is_oss: bool,
-    pub repository_url: Option<String>,
-    pub license: Option<String>,
-    pub use_cases: Vec<ToolUseCase>,
-}
-
-impl ToolSafetyConfig {
-    /// Compute TCL from TI and TD.
-    pub fn tcl(&self) -> Tcl {
-        determine_tcl(self.ti, self.td)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn tcl_matrix() {
-        // TI1 always yields TCL1
-        assert_eq!(
-            determine_tcl(ToolImpact::Ti1, ToolDetection::Td1),
-            Tcl::Tcl1
-        );
-        assert_eq!(
-            determine_tcl(ToolImpact::Ti1, ToolDetection::Td2),
-            Tcl::Tcl1
-        );
-        assert_eq!(
-            determine_tcl(ToolImpact::Ti1, ToolDetection::Td3),
-            Tcl::Tcl1
-        );
-
-        // TI2 depends on TD
-        assert_eq!(
-            determine_tcl(ToolImpact::Ti2, ToolDetection::Td1),
-            Tcl::Tcl1
-        );
-        assert_eq!(
-            determine_tcl(ToolImpact::Ti2, ToolDetection::Td2),
-            Tcl::Tcl2
-        );
-        assert_eq!(
-            determine_tcl(ToolImpact::Ti2, ToolDetection::Td3),
-            Tcl::Tcl3
-        );
-    }
-
-    #[test]
-    fn tcl_ordering() {
-        assert!(Tcl::Tcl1 < Tcl::Tcl2);
-        assert!(Tcl::Tcl2 < Tcl::Tcl3);
-    }
-
-    #[test]
-    fn qualification_methods_by_tcl() {
-        assert!(qualification_methods(Tcl::Tcl1).is_empty());
-        assert!(!qualification_methods(Tcl::Tcl2).is_empty());
-        assert!(!qualification_methods(Tcl::Tcl3).is_empty());
-        // TCL3 requires more methods than TCL2
-        // (TCL3 excludes "increased confidence from use")
-        assert!(qualification_methods(Tcl::Tcl3).len() < qualification_methods(Tcl::Tcl2).len());
-    }
-
-    #[test]
-    fn asil_ordering() {
-        assert!(AsilLevel::Qm < AsilLevel::AsilA);
-        assert!(AsilLevel::AsilA < AsilLevel::AsilB);
-        assert!(AsilLevel::AsilD > AsilLevel::AsilC);
-    }
-
-    #[test]
-    fn iec62304_class_c_requires_all() {
-        let activities = iec62304_required_activities(Iec62304Class::ClassC);
-        assert!(activities.len() > iec62304_required_activities(Iec62304Class::ClassA).len());
-    }
-
-    #[test]
-    fn tool_config_tcl() {
-        let config = ToolSafetyConfig {
-            name: "loom".into(),
-            ti: ToolImpact::Ti2,
-            td: ToolDetection::Td2,
-            classification: SafetyClassification::automotive(AsilLevel::AsilB),
-            is_oss: true,
-            repository_url: Some("https://github.com/example/loom".into()),
-            license: Some("Apache-2.0".into()),
-            use_cases: vec![],
-        };
-        assert_eq!(config.tcl(), Tcl::Tcl2);
-    }
-
-    #[test]
-    fn aspice_mapping_covers_pipeline() {
-        let mapping = pipeline_aspice_mapping();
-        // Should cover SWE.1 through SWE.6
-        let processes: Vec<_> = mapping.iter().map(|(_, p)| p).collect();
-        assert!(processes.contains(&&AspiceProcess::Swe1));
-        assert!(processes.contains(&&AspiceProcess::Swe4));
-        assert!(processes.contains(&&AspiceProcess::Swe6));
-    }
-}
diff --git a/crates/thrum-core/src/spec.rs b/crates/thrum-core/src/spec.rs
index e07ef7d..8496459 100644
--- a/crates/thrum-core/src/spec.rs
+++ b/crates/thrum-core/src/spec.rs
@@ -185,6 +185,110 @@ impl Spec {
     pub fn from_toml(content: &str) -> anyhow::Result<Self> {
         Ok(toml::from_str(content)?)
     }
+
+    /// Serialize the spec to TOML.
+    pub fn to_toml(&self) -> anyhow::Result<String> {
+        Ok(toml::to_string_pretty(self)?)
+    }
+
+    /// Extract all requirement IDs from this spec.
+    pub fn requirement_ids(&self) -> Vec<String> {
+        self.requirements.iter().map(|r| r.id.clone()).collect()
+    }
+
+    /// Verify that the implementation matches the spec's expectations.
+    ///
+    /// Returns a list of compliance issues (empty = fully compliant).
+    /// Checks:
+    /// 1. `affected_files` — each listed file should appear in `actual_files_changed`
+    /// 2. `proof_obligations` — each proof_file should exist under `repo_root`
+    pub fn verify_implementation(
+        &self,
+        actual_files_changed: &[String],
+        repo_root: &std::path::Path,
+    ) -> Vec<SpecComplianceIssue> {
+        let mut issues = Vec::new();
+
+        // Check affected files: each spec-declared file should be in the changed set
+        for expected in &self.design.affected_files {
+            let found = actual_files_changed
+                .iter()
+                .any(|f| f.contains(expected) || expected.contains(f));
+            if !found {
+                issues.push(SpecComplianceIssue {
+                    category: ComplianceCategory::AffectedFile,
+                    message: format!(
+                        "spec declares affected file '{}' but it was not modified",
+                        expected
+                    ),
+                    severity: ComplianceSeverity::Warning,
+                });
+            }
+        }
+
+        // Check proof obligations: each proof_file should exist
+        for po in &self.proof_obligations {
+            if let Some(ref proof_file) = po.proof_file {
+                let path = repo_root.join(proof_file);
+                if !path.exists() {
+                    issues.push(SpecComplianceIssue {
+                        category: ComplianceCategory::ProofFile,
+                        message: format!(
+                            "proof obligation '{}' expects file '{}' but it does not exist",
+                            po.property, proof_file
+                        ),
+                        severity: ComplianceSeverity::Error,
+                    });
+                }
+            }
+        }
+
+        issues
+    }
+
+    /// Convert spec acceptance criteria to verification-tagged criteria.
+    ///
+    /// Uses the enrichment logic from the verification module to auto-tag
+    /// criteria that don't already have tags.
+    pub fn tagged_acceptance_criteria(&self) -> Vec<String> {
+        crate::verification::enrich_criteria(&self.acceptance_criteria)
+    }
+}
+
+/// Result of verifying an implementation against a spec.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SpecComplianceIssue {
+    pub category: ComplianceCategory,
+    pub message: String,
+    pub severity: ComplianceSeverity,
+}
+
+/// What aspect of spec compliance was checked.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum ComplianceCategory {
+    /// Expected file was not modified.
+    AffectedFile,
+    /// Required proof file is missing.
+    ProofFile,
+}
+
+/// How critical the compliance issue is.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub enum ComplianceSeverity {
+    /// Non-blocking: may indicate spec was over-specified.
+    Warning,
+    /// Blocking: proof obligations must be met.
+    Error,
+}
+
+impl std::fmt::Display for SpecComplianceIssue {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let level = match self.severity {
+            ComplianceSeverity::Warning => "WARN",
+            ComplianceSeverity::Error => "ERROR",
+        };
+        write!(f, "[{level}] {}", self.message)
+    }
 }
 
 #[cfg(test)]
@@ -255,4 +359,114 @@ mod tests {
         assert_eq!(format!("{}", Priority::P0), "P0");
         assert_eq!(format!("{}", Priority::P3), "P3");
     }
+
+    #[test]
+    fn spec_toml_roundtrip() {
+        let spec = Spec {
+            title: "Test TOML roundtrip".into(),
+            context: "Testing serialization".into(),
+            requirements: vec![SpecRequirement {
+                id: "REQ-TEST-001".into(),
+                description: "Must roundtrip".into(),
+                rationale: "Testing".into(),
+                priority: Priority::P1,
+                safety_relevance: None,
+            }],
+            design: DesignSpec {
+                approach: "Direct serialization".into(),
+                affected_files: vec!["src/lib.rs".into()],
+                interfaces: Vec::new(),
+                constraints: Vec::new(),
+            },
+            acceptance_criteria: vec!["TOML roundtrip works (TEST)".into()],
+            proof_obligations: Vec::new(),
+            test_plan: Vec::new(),
+        };
+        let toml_str = spec.to_toml().unwrap();
+        let parsed = Spec::from_toml(&toml_str).unwrap();
+        assert_eq!(parsed.title, spec.title);
+        assert_eq!(parsed.requirements.len(), 1);
+        assert_eq!(parsed.requirements[0].id, "REQ-TEST-001");
+    }
+
+    #[test]
+    fn requirement_ids_extraction() {
+        let spec = Spec {
+            requirements: vec![
+                SpecRequirement {
+                    id: "REQ-001".into(),
+                    description: "First".into(),
+                    rationale: String::new(),
+                    priority: Priority::P1,
+                    safety_relevance: None,
+                },
+                SpecRequirement {
+                    id: "REQ-002".into(),
+                    description: "Second".into(),
+                    rationale: String::new(),
+                    priority: Priority::P2,
+                    safety_relevance: None,
+                },
+            ],
+            ..Default::default()
+        };
+        assert_eq!(spec.requirement_ids(), vec!["REQ-001", "REQ-002"]);
+    }
+
+    #[test]
+    fn verify_implementation_affected_files() {
+        let spec = Spec {
+            design: DesignSpec {
+                affected_files: vec!["src/lib.rs".into(), "src/main.rs".into()],
+                ..Default::default()
+            },
+            ..Default::default()
+        };
+
+        let changed = vec!["src/lib.rs".into()];
+        let issues = spec.verify_implementation(&changed, std::path::Path::new("/nonexistent"));
+        // src/main.rs was not changed → warning
+        assert_eq!(issues.len(), 1);
+        assert_eq!(issues[0].severity, ComplianceSeverity::Warning);
+        assert!(issues[0].message.contains("src/main.rs"));
+    }
+
+    #[test]
+    fn verify_implementation_proof_files() {
+        let tmp = std::env::temp_dir().join("thrum-spec-test");
+        let _ = std::fs::create_dir_all(&tmp);
+
+        let spec = Spec {
+            proof_obligations: vec![ProofObligation {
+                property: "test property".into(),
+                prover: "Z3".into(),
+                proof_file: Some("nonexistent_proof.z3".into()),
+            }],
+            ..Default::default()
+        };
+
+        let issues = spec.verify_implementation(&[], &tmp);
+        assert_eq!(issues.len(), 1);
+        assert_eq!(issues[0].severity, ComplianceSeverity::Error);
+        assert!(issues[0].message.contains("nonexistent_proof.z3"));
+
+        let _ = std::fs::remove_dir_all(&tmp);
+    }
+
+    #[test]
+    fn spec_tagged_acceptance_criteria() {
+        let spec = Spec {
+            acceptance_criteria: vec![
+                "Tests pass (TEST)".into(),
+                "Dashboard shows results".into(), // should get auto-tagged
+            ],
+            ..Default::default()
+        };
+        let tagged = spec.tagged_acceptance_criteria();
+        assert_eq!(tagged.len(), 2);
+        // First already has tag
+        assert!(tagged[0].contains("(TEST)"));
+        // Second should be enriched with a tag
+        assert!(tagged[1].contains('('));
+    }
 }
diff --git a/crates/thrum-core/src/sphinx_needs.rs b/crates/thrum-core/src/sphinx_needs.rs
deleted file mode 100644
index b8d517f..0000000
--- a/crates/thrum-core/src/sphinx_needs.rs
+++ /dev/null
@@ -1,588 +0,0 @@
-//! Export traceability data in sphinx-needs format.
-//!
-//! Generates `needs.json` files compatible with sphinx-needs `needimport` directive.
-//! This enables full requirements traceability documentation built by Sphinx.
-//!
-//! V-model chain: REQ → ARCH → IMPL → UTEST → PROOF → ITEST → REVIEW → VERIF → REL
-//!
-//! Each maps to an ASPICE SWE process:
-//!   req → SWE.1, arch → SWE.2, impl → SWE.3, utest → SWE.4,
-//!   itest → SWE.5, qtest → SWE.6
-
-use crate::traceability::{TraceArtifact, TraceRecord};
-use chrono::Utc;
-use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-
-// ─── Need Types (matching sphinx-needs configuration) ──────────────────
-
-/// All need types in the traceability chain.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum NeedType {
-    Req,
-    Arch,
-    Design,
-    Impl,
-    Utest,
-    Itest,
-    Proof,
-    Review,
-    Verif,
-    Rel,
-}
-
-impl NeedType {
-    pub fn label(&self) -> &'static str {
-        match self {
-            NeedType::Req => "Requirement",
-            NeedType::Arch => "Architecture",
-            NeedType::Design => "Detailed Design",
-            NeedType::Impl => "Implementation",
-            NeedType::Utest => "Unit Test",
-            NeedType::Itest => "Integration Test",
-            NeedType::Proof => "Formal Proof",
-            NeedType::Review => "Code Review",
-            NeedType::Verif => "Verification Report",
-            NeedType::Rel => "Release Artifact",
-        }
-    }
-
-    pub fn directive(&self) -> &'static str {
-        match self {
-            NeedType::Req => "req",
-            NeedType::Arch => "arch",
-            NeedType::Design => "design",
-            NeedType::Impl => "impl",
-            NeedType::Utest => "utest",
-            NeedType::Itest => "itest",
-            NeedType::Proof => "proof",
-            NeedType::Review => "review",
-            NeedType::Verif => "verif",
-            NeedType::Rel => "rel",
-        }
-    }
-
-    /// ASPICE process this need type corresponds to.
-    pub fn aspice_process(&self) -> &'static str {
-        match self {
-            NeedType::Req => "SWE.1",
-            NeedType::Arch => "SWE.2",
-            NeedType::Design => "SWE.3",
-            NeedType::Impl => "SWE.3",
-            NeedType::Utest => "SWE.4",
-            NeedType::Itest => "SWE.5",
-            NeedType::Proof => "SWE.4",
-            NeedType::Review => "SWE.4",
-            NeedType::Verif => "SWE.6",
-            NeedType::Rel => "SWE.6",
-        }
-    }
-}
-
-// ─── Need (sphinx-needs compatible) ────────────────────────────────────
-
-/// A single need item compatible with sphinx-needs JSON format.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Need {
-    /// Unique identifier, e.g. "REQ_LOOM_042", "IMPL_LOOM_042_1".
-    pub id: String,
-    /// Need type (req, arch, impl, etc.).
-    #[serde(rename = "type")]
-    pub need_type: String,
-    /// Human-readable title.
-    pub title: String,
-    /// Description / content.
-    pub description: String,
-    /// Current status.
-    pub status: String,
-    /// Links to other needs (traceability).
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    pub links: Vec<String>,
-    /// Tags for filtering.
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    pub tags: Vec<String>,
-    /// Custom fields.
-    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
-    pub extra: HashMap<String, String>,
-}
-
-// ─── needs.json export format ──────────────────────────────────────────
-
-/// Top-level structure of a needs.json file (sphinx-needs needimport format).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct NeedsJson {
-    /// Project name.
-    pub project: String,
-    /// Export version.
-    pub version: String,
-    /// All needs keyed by their ID.
-    pub needs: HashMap<String, Need>,
-    /// Metadata about the export.
-    #[serde(default)]
-    pub created: String,
-}
-
-impl NeedsJson {
-    pub fn new(project: &str, version: &str) -> Self {
-        Self {
-            project: project.to_string(),
-            version: version.to_string(),
-            needs: HashMap::new(),
-            created: Utc::now().to_rfc3339(),
-        }
-    }
-
-    pub fn add(&mut self, need: Need) {
-        self.needs.insert(need.id.clone(), need);
-    }
-
-    /// Export to JSON string.
-    pub fn to_json(&self) -> anyhow::Result<String> {
-        Ok(serde_json::to_string_pretty(self)?)
-    }
-}
-
-// ─── Conversion from TraceRecord to Need ───────────────────────────────
-
-/// Convert a TraceRecord into one or more sphinx-needs Need items.
-pub fn trace_record_to_needs(record: &TraceRecord) -> Vec<Need> {
-    let base_id = sanitize_id(&record.requirement_id);
-    let task_suffix = format!("T{}", record.task_id);
-
-    match &record.artifact {
-        TraceArtifact::Requirement { title, description } => {
-            vec![Need {
-                id: base_id,
-                need_type: NeedType::Req.directive().to_string(),
-                title: title.clone(),
-                description: description.clone(),
-                status: "open".into(),
-                links: vec![],
-                tags: vec!["requirement".into()],
-                extra: HashMap::new(),
-            }]
-        }
-        TraceArtifact::Design { rationale } => {
-            let id = format!("{base_id}_ARCH_{task_suffix}");
-            vec![Need {
-                id,
-                need_type: NeedType::Arch.directive().to_string(),
-                title: format!("Architecture for {}", record.requirement_id),
-                description: rationale.clone(),
-                status: "open".into(),
-                links: vec![base_id],
-                tags: vec!["architecture".into()],
-                extra: HashMap::new(),
-            }]
-        }
-        TraceArtifact::Implementation {
-            branch,
-            commit_sha,
-            files_changed,
-        } => {
-            let id = format!("{base_id}_IMPL_{task_suffix}");
-            let mut extra = HashMap::new();
-            extra.insert("branch".into(), branch.clone());
-            if let Some(sha) = commit_sha {
-                extra.insert("commit".into(), sha.clone());
-            }
-            extra.insert("files".into(), files_changed.join(", "));
-
-            vec![Need {
-                id,
-                need_type: NeedType::Impl.directive().to_string(),
-                title: format!("Implementation for {}", record.requirement_id),
-                description: format!("Branch: {}\nFiles: {}", branch, files_changed.join(", ")),
-                status: "implemented".into(),
-                links: vec![base_id],
-                tags: vec!["implementation".into()],
-                extra,
-            }]
-        }
-        TraceArtifact::Test {
-            gate_level,
-            passed,
-            report_json,
-        } => {
-            let need_type = if gate_level.contains("Integration") || gate_level.contains("3") {
-                NeedType::Itest
-            } else {
-                NeedType::Utest
-            };
-            let id = format!(
-                "{}_{}_{}",
-                base_id,
-                need_type.directive().to_uppercase(),
-                task_suffix
-            );
-            let status = if *passed { "passed" } else { "failed" };
-
-            let mut extra = HashMap::new();
-            extra.insert("gate_level".into(), gate_level.clone());
-            // Store a truncated version of the report
-            if report_json.len() <= 1000 {
-                extra.insert("report".into(), report_json.clone());
-            }
-
-            vec![Need {
-                id,
-                need_type: need_type.directive().to_string(),
-                title: format!("{} for {}", need_type.label(), record.requirement_id),
-                description: format!("Gate: {} — {}", gate_level, status),
-                status: status.into(),
-                links: vec![base_id],
-                tags: vec!["test".into(), gate_level.clone()],
-                extra,
-            }]
-        }
-        TraceArtifact::Proof {
-            prover,
-            passed,
-            report_json,
-        } => {
-            let id = format!("{base_id}_PROOF_{task_suffix}");
-            let status = if *passed { "verified" } else { "failed" };
-            let mut extra = HashMap::new();
-            extra.insert("prover".into(), prover.clone());
-            if report_json.len() <= 1000 {
-                extra.insert("report".into(), report_json.clone());
-            }
-
-            vec![Need {
-                id,
-                need_type: NeedType::Proof.directive().to_string(),
-                title: format!("Formal proof ({}) for {}", prover, record.requirement_id),
-                description: format!("Prover: {} — {}", prover, status),
-                status: status.into(),
-                links: vec![base_id],
-                tags: vec!["proof".into(), prover.clone()],
-                extra,
-            }]
-        }
-        TraceArtifact::Review {
-            reviewer,
-            approved,
-            comments,
-        } => {
-            let id = format!("{base_id}_REVIEW_{task_suffix}");
-            let status = if *approved {
-                "approved"
-            } else {
-                "changes_requested"
-            };
-
-            vec![Need {
-                id,
-                need_type: NeedType::Review.directive().to_string(),
-                title: format!("Code review for {}", record.requirement_id),
-                description: comments.clone(),
-                status: status.into(),
-                links: vec![base_id],
-                tags: vec!["review".into(), reviewer.clone()],
-                extra: HashMap::from([("reviewer".into(), reviewer.clone())]),
-            }]
-        }
-        TraceArtifact::Release {
-            version,
-            targets,
-            checksums,
-        } => {
-            let id = format!("{base_id}_REL_{}", sanitize_id(version));
-            let mut extra = HashMap::new();
-            extra.insert("targets".into(), targets.join(", "));
-            for (file, hash) in checksums {
-                extra.insert(format!("sha256_{file}"), hash.clone());
-            }
-
-            vec![Need {
-                id,
-                need_type: NeedType::Rel.directive().to_string(),
-                title: format!("Release {} for {}", version, record.requirement_id),
-                description: format!("Targets: {}", targets.join(", ")),
-                status: "released".into(),
-                links: vec![base_id],
-                tags: vec!["release".into(), version.clone()],
-                extra,
-            }]
-        }
-    }
-}
-
-/// Generate sphinx-needs type configuration for conf.py.
-pub fn needs_types_config() -> Vec<NeedTypeConfig> {
-    vec![
-        NeedTypeConfig {
-            directive: "req".into(),
-            title: "Requirement".into(),
-            prefix: "REQ_".into(),
-            color: "#BFD8D2".into(),
-            style: "node".into(),
-        },
-        NeedTypeConfig {
-            directive: "arch".into(),
-            title: "Architecture".into(),
-            prefix: "ARCH_".into(),
-            color: "#DCFAC0".into(),
-            style: "node".into(),
-        },
-        NeedTypeConfig {
-            directive: "design".into(),
-            title: "Detailed Design".into(),
-            prefix: "DES_".into(),
-            color: "#C0E0FA".into(),
-            style: "node".into(),
-        },
-        NeedTypeConfig {
-            directive: "impl".into(),
-            title: "Implementation".into(),
-            prefix: "IMPL_".into(),
-            color: "#FED8B1".into(),
-            style: "node".into(),
-        },
-        NeedTypeConfig {
-            directive: "utest".into(),
-            title: "Unit Test".into(),
-            prefix: "UT_".into(),
-            color: "#D5E8D4".into(),
-            style: "node".into(),
-        },
-        NeedTypeConfig {
-            directive: "itest".into(),
-            title: "Integration Test".into(),
-            prefix: "IT_".into(),
-            color: "#DAE8FC".into(),
-            style: "node".into(),
-        },
-        NeedTypeConfig {
-            directive: "proof".into(),
-            title: "Formal Proof".into(),
-            prefix: "PRF_".into(),
-            color: "#E1D5E7".into(),
-            style: "node".into(),
-        },
-        NeedTypeConfig {
-            directive: "review".into(),
-            title: "Code Review".into(),
-            prefix: "RVW_".into(),
-            color: "#FFF2CC".into(),
-            style: "node".into(),
-        },
-        NeedTypeConfig {
-            directive: "verif".into(),
-            title: "Verification Report".into(),
-            prefix: "VER_".into(),
-            color: "#F8CECC".into(),
-            style: "node".into(),
-        },
-        NeedTypeConfig {
-            directive: "rel".into(),
-            title: "Release Artifact".into(),
-            prefix: "REL_".into(),
-            color: "#B0E0E6".into(),
-            style: "node".into(),
-        },
-    ]
-}
-
-/// Configuration for a sphinx-needs type (used in conf.py generation).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct NeedTypeConfig {
-    pub directive: String,
-    pub title: String,
-    pub prefix: String,
-    pub color: String,
-    pub style: String,
-}
-
-/// Generate the `needs_types` Python list for conf.py.
-pub fn generate_conf_py_needs_types() -> String {
-    let types = needs_types_config();
-    let mut lines = vec!["needs_types = [".to_string()];
-    for t in &types {
-        lines.push(format!(
-            "    dict(directive=\"{}\", title=\"{}\", prefix=\"{}\", color=\"{}\", style=\"{}\"),",
-            t.directive, t.title, t.prefix, t.color, t.style
-        ));
-    }
-    lines.push("]".to_string());
-    lines.join("\n")
-}
-
-/// Generate RST content for a traceability overview page.
-pub fn generate_traceability_rst(tool_name: &str) -> String {
-    let title = format!("{tool_name} Traceability");
-    let underline = "=".repeat(title.len());
-    format!(
-        r#"{title}
-{underline}
-
-.. needimport:: needs.json
-
-Requirements
-------------
-.. needlist::
-   :types: req
-   :style: table
-
-Architecture
-------------
-.. needlist::
-   :types: arch
-   :style: table
-
-Implementation
---------------
-.. needlist::
-   :types: impl
-   :style: table
-
-Unit Tests
-----------
-.. needlist::
-   :types: utest
-   :style: table
-
-Formal Proofs
--------------
-.. needlist::
-   :types: proof
-   :style: table
-
-Integration Tests
------------------
-.. needlist::
-   :types: itest
-   :style: table
-
-Reviews
--------
-.. needlist::
-   :types: review
-   :style: table
-
-Traceability Flow
------------------
-.. needflow::
-   :filter: type in ['req', 'arch', 'impl', 'utest', 'proof', 'itest', 'review', 'rel']
-
-Traceability Matrix
--------------------
-.. needtable::
-   :columns: id;title;type;status;links
-   :style: table
-"#,
-    )
-}
-
-/// Sanitize a string for use as a sphinx-needs ID (alphanumeric + underscores).
-fn sanitize_id(s: &str) -> String {
-    s.chars()
-        .map(|c| {
-            if c.is_alphanumeric() {
-                c.to_ascii_uppercase()
-            } else {
-                '_'
-            }
-        })
-        .collect::<String>()
-        .trim_matches('_')
-        .to_string()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanitize_id_works() {
-        assert_eq!(sanitize_id("REQ-LOOM-042"), "REQ_LOOM_042");
-        assert_eq!(sanitize_id("some.thing/here"), "SOME_THING_HERE");
-    }
-
-    #[test]
-    fn need_type_labels() {
-        assert_eq!(NeedType::Req.label(), "Requirement");
-        assert_eq!(NeedType::Proof.label(), "Formal Proof");
-        assert_eq!(NeedType::Req.aspice_process(), "SWE.1");
-        assert_eq!(NeedType::Itest.aspice_process(), "SWE.5");
-    }
-
-    #[test]
-    fn needs_json_roundtrip() {
-        let mut nj = NeedsJson::new("pulseengine", "0.1.0");
-        nj.add(Need {
-            id: "REQ_LOOM_042".into(),
-            need_type: "req".into(),
-            title: "Add i32.popcnt".into(),
-            description: "Support i32.popcnt in the optimization pipeline".into(),
-            status: "open".into(),
-            links: vec![],
-            tags: vec!["loom".into()],
-            extra: HashMap::new(),
-        });
-
-        let json = nj.to_json().unwrap();
-        let parsed: NeedsJson = serde_json::from_str(&json).unwrap();
-        assert_eq!(parsed.needs.len(), 1);
-        assert!(parsed.needs.contains_key("REQ_LOOM_042"));
-    }
-
-    #[test]
-    fn trace_record_to_need_requirement() {
-        let record = TraceRecord {
-            id: 1,
-            task_id: 42,
-            requirement_id: "REQ-LOOM-042".into(),
-            artifact: TraceArtifact::Requirement {
-                title: "Add i32.popcnt".into(),
-                description: "Support popcount".into(),
-            },
-            created_at: Utc::now(),
-        };
-
-        let needs = trace_record_to_needs(&record);
-        assert_eq!(needs.len(), 1);
-        assert_eq!(needs[0].id, "REQ_LOOM_042");
-        assert_eq!(needs[0].need_type, "req");
-    }
-
-    #[test]
-    fn trace_record_to_need_impl() {
-        let record = TraceRecord {
-            id: 2,
-            task_id: 42,
-            requirement_id: "REQ-LOOM-042".into(),
-            artifact: TraceArtifact::Implementation {
-                branch: "auto/TASK-0042/loom/add-popcnt".into(),
-                commit_sha: Some("abc123".into()),
-                files_changed: vec!["src/lib.rs".into()],
-            },
-            created_at: Utc::now(),
-        };
-
-        let needs = trace_record_to_needs(&record);
-        assert_eq!(needs.len(), 1);
-        assert_eq!(needs[0].need_type, "impl");
-        // Links back to the requirement
-        assert_eq!(needs[0].links, vec!["REQ_LOOM_042"]);
-        assert_eq!(needs[0].extra.get("commit").unwrap(), "abc123");
-    }
-
-    #[test]
-    fn conf_py_generation() {
-        let conf = generate_conf_py_needs_types();
-        assert!(conf.contains("needs_types = ["));
-        assert!(conf.contains("directive=\"req\""));
-        assert!(conf.contains("directive=\"proof\""));
-        assert!(conf.contains("directive=\"rel\""));
-    }
-
-    #[test]
-    fn traceability_rst_generation() {
-        let rst = generate_traceability_rst("loom");
-        assert!(rst.contains("needimport:: needs.json"));
-        assert!(rst.contains("needflow::"));
-        assert!(rst.contains("needtable::"));
-    }
-}
diff --git a/crates/thrum-core/src/sync.rs b/crates/thrum-core/src/sync.rs
new file mode 100644
index 0000000..04d24da
--- /dev/null
+++ b/crates/thrum-core/src/sync.rs
@@ -0,0 +1,275 @@
+//! Remote sync point types for keeping local branches in sync with remote.
+//!
+//! After a PR is merged to remote main, all local branches need updating.
+//! A "sync point" represents this operation and its results.
+
+use crate::task::{RepoName, TaskId};
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+
+/// How aggressively to sync local main with remote after PR merges.
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum SyncStrategy {
+    /// Sync immediately after every PR merge.
+    #[default]
+    Eager,
+    /// Batch syncs after N merges or on a timer.
+    Batched {
+        /// Number of merges before triggering a sync.
+        #[serde(default = "default_batch_count")]
+        batch_count: u32,
+        /// Maximum seconds between syncs (timer-based trigger).
+        #[serde(default = "default_batch_interval_secs")]
+        interval_secs: u64,
+    },
+    /// Sync only when triggered manually via dashboard/API.
+    Manual,
+}
+
+fn default_batch_count() -> u32 {
+    3
+}
+
+fn default_batch_interval_secs() -> u64 {
+    300
+}
+
+/// Configuration for remote sync behavior.
+///
+/// Parsed from `[repo.ci]` section in repos.toml alongside CI config.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SyncConfig {
+    /// Whether remote sync is enabled (default: true when CI is enabled).
+    #[serde(default = "default_sync_enabled")]
+    pub enabled: bool,
+    /// Sync strategy: eager, batched, or manual.
+    #[serde(default)]
+    pub sync_strategy: SyncStrategy,
+    /// Whether to automatically rebase in-flight task branches.
+    #[serde(default = "default_auto_rebase")]
+    pub auto_rebase: bool,
+    /// Whether to dispatch a rebase agent on conflict.
+    #[serde(default = "default_dispatch_rebase_agent")]
+    pub dispatch_rebase_agent: bool,
+}
+
+fn default_sync_enabled() -> bool {
+    true
+}
+
+fn default_auto_rebase() -> bool {
+    true
+}
+
+fn default_dispatch_rebase_agent() -> bool {
+    true
+}
+
+impl Default for SyncConfig {
+    fn default() -> Self {
+        Self {
+            enabled: default_sync_enabled(),
+            sync_strategy: SyncStrategy::default(),
+            auto_rebase: default_auto_rebase(),
+            dispatch_rebase_agent: default_dispatch_rebase_agent(),
+        }
+    }
+}
+
+/// Result of rebasing a single branch onto updated main.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BranchRebaseResult {
+    /// Branch that was rebased.
+    pub branch: String,
+    /// Task associated with this branch (if any).
+    pub task_id: Option<TaskId>,
+    /// Whether the rebase succeeded without conflicts.
+    pub success: bool,
+    /// Whether conflicts were encountered.
+    pub had_conflicts: bool,
+    /// Whether a rebase agent was dispatched for conflict resolution.
+    pub agent_dispatched: bool,
+    /// New HEAD SHA after rebase (if successful).
+    pub new_head_sha: Option<String>,
+    /// Error message if the rebase failed.
+    pub error: Option<String>,
+}
+
+/// A sync point event: captures the result of syncing local with remote.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SyncPointRecord {
+    /// Unique identifier for this sync point.
+    pub id: String,
+    /// Repository that was synced.
+    pub repo: RepoName,
+    /// The commit SHA on remote main that we synced to.
+    pub remote_sha: String,
+    /// Previous local main SHA before sync.
+    pub previous_local_sha: String,
+    /// Whether the local main was fast-forwarded (no rebase needed).
+    pub fast_forward: bool,
+    /// Results of rebasing in-flight branches.
+    pub branch_results: Vec<BranchRebaseResult>,
+    /// How many branches were rebased successfully.
+    pub branches_rebased: u32,
+    /// How many branches had conflicts.
+    pub branches_conflicted: u32,
+    /// Timestamp of the sync.
+    pub synced_at: DateTime<Utc>,
+    /// What triggered the sync.
+    pub trigger: SyncTrigger,
+}
+
+/// What triggered a sync point.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum SyncTrigger {
+    /// Automatic sync after a PR merge.
+    PrMerge { pr_number: u64 },
+    /// Batched sync (multiple merges accumulated).
+    Batched { merge_count: u32 },
+    /// Manual sync triggered via API or dashboard.
+    Manual,
+    /// Sync on engine startup.
+    Startup,
+}
+
+impl std::fmt::Display for SyncStrategy {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SyncStrategy::Eager => write!(f, "eager"),
+            SyncStrategy::Batched {
+                batch_count,
+                interval_secs,
+            } => write!(f, "batched(n={batch_count}, interval={interval_secs}s)"),
+            SyncStrategy::Manual => write!(f, "manual"),
+        }
+    }
+}
+
+impl std::fmt::Display for SyncTrigger {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SyncTrigger::PrMerge { pr_number } => write!(f, "pr-merge(#{pr_number})"),
+            SyncTrigger::Batched { merge_count } => write!(f, "batched({merge_count} merges)"),
+            SyncTrigger::Manual => write!(f, "manual"),
+            SyncTrigger::Startup => write!(f, "startup"),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sync_strategy_default_is_eager() {
+        assert_eq!(SyncStrategy::default(), SyncStrategy::Eager);
+    }
+
+    #[test]
+    fn sync_strategy_display() {
+        assert_eq!(SyncStrategy::Eager.to_string(), "eager");
+        assert_eq!(SyncStrategy::Manual.to_string(), "manual");
+        assert_eq!(
+            SyncStrategy::Batched {
+                batch_count: 5,
+                interval_secs: 600
+            }
+            .to_string(),
+            "batched(n=5, interval=600s)"
+        );
+    }
+
+    #[test]
+    fn sync_trigger_display() {
+        assert_eq!(
+            SyncTrigger::PrMerge { pr_number: 42 }.to_string(),
+            "pr-merge(#42)"
+        );
+        assert_eq!(
+            SyncTrigger::Batched { merge_count: 3 }.to_string(),
+            "batched(3 merges)"
+        );
+        assert_eq!(SyncTrigger::Manual.to_string(), "manual");
+        assert_eq!(SyncTrigger::Startup.to_string(), "startup");
+    }
+
+    #[test]
+    fn sync_config_defaults() {
+        let config = SyncConfig::default();
+        assert!(config.enabled);
+        assert_eq!(config.sync_strategy, SyncStrategy::Eager);
+        assert!(config.auto_rebase);
+        assert!(config.dispatch_rebase_agent);
+    }
+
+    #[test]
+    fn sync_strategy_serde_roundtrip() {
+        let strategies = vec![
+            SyncStrategy::Eager,
+            SyncStrategy::Manual,
+            SyncStrategy::Batched {
+                batch_count: 5,
+                interval_secs: 120,
+            },
+        ];
+        for strategy in strategies {
+            let json = serde_json::to_string(&strategy).unwrap();
+            let parsed: SyncStrategy = serde_json::from_str(&json).unwrap();
+            assert_eq!(strategy, parsed);
+        }
+    }
+
+    #[test]
+    fn sync_config_serde_roundtrip() {
+        let config = SyncConfig {
+            enabled: true,
+            sync_strategy: SyncStrategy::Batched {
+                batch_count: 2,
+                interval_secs: 60,
+            },
+            auto_rebase: false,
+            dispatch_rebase_agent: true,
+        };
+        let json = serde_json::to_string(&config).unwrap();
+        let parsed: SyncConfig = serde_json::from_str(&json).unwrap();
+        assert_eq!(parsed.enabled, config.enabled);
+        assert_eq!(parsed.auto_rebase, config.auto_rebase);
+        assert_eq!(parsed.sync_strategy, config.sync_strategy);
+    }
+
+    #[test]
+    fn branch_rebase_result_construction() {
+        let result = BranchRebaseResult {
+            branch: "auto/TASK-0001/loom/feature".into(),
+            task_id: Some(TaskId(1)),
+            success: true,
+            had_conflicts: false,
+            agent_dispatched: false,
+            new_head_sha: Some("abc123".into()),
+            error: None,
+        };
+        assert!(result.success);
+        assert!(!result.had_conflicts);
+    }
+
+    #[test]
+    fn sync_point_record_construction() {
+        let record = SyncPointRecord {
+            id: "sync-001".into(),
+            repo: RepoName::new("loom"),
+            remote_sha: "abc123".into(),
+            previous_local_sha: "def456".into(),
+            fast_forward: true,
+            branch_results: vec![],
+            branches_rebased: 0,
+            branches_conflicted: 0,
+            synced_at: Utc::now(),
+            trigger: SyncTrigger::PrMerge { pr_number: 42 },
+        };
+        assert!(record.fast_forward);
+        assert_eq!(record.branches_rebased, 0);
+    }
+}
diff --git a/crates/thrum-core/src/task.rs b/crates/thrum-core/src/task.rs
index cef33b2..e877061 100644
--- a/crates/thrum-core/src/task.rs
+++ b/crates/thrum-core/src/task.rs
@@ -1,4 +1,7 @@
+use crate::dependency::{BatchBarrier, TaskDependency};
 use crate::spec::Spec;
+use crate::trust::TrustAssessment;
+use crate::verification::TaggedCriterion;
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Deserializer, Serialize};
 use std::fmt;
@@ -52,8 +55,8 @@ impl std::str::FromStr for RepoName {
     }
 }
 
-// Re-export from safety module — single source of truth.
-pub use crate::safety::AsilLevel;
+// Re-export from repo module for backward compatibility.
+pub use crate::repo::AsilLevel;
 
 /// Gate report summarizing verification results.
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -93,6 +96,51 @@ pub struct CheckResult {
     pub stdout: String,
     pub stderr: String,
     pub exit_code: i32,
+    /// How long this individual check took, in seconds.
+    #[serde(default)]
+    pub duration_secs: f64,
+    /// Structured findings from the check (e.g., advisory count, mutation survival rate).
+    /// Empty for simple pass/fail checks.
+    #[serde(default)]
+    pub findings: Vec<CheckFinding>,
+}
+
+impl CheckResult {
+    /// Convenience constructor for a simple pass/fail check result.
+    pub fn simple(
+        name: impl Into<String>,
+        passed: bool,
+        stdout: impl Into<String>,
+        stderr: impl Into<String>,
+        exit_code: i32,
+    ) -> Self {
+        Self {
+            name: name.into(),
+            passed,
+            stdout: stdout.into(),
+            stderr: stderr.into(),
+            exit_code,
+            duration_secs: 0.0,
+            findings: Vec::new(),
+        }
+    }
+}
+
+/// A structured finding from a gate check.
+///
+/// Instead of requiring humans to parse stdout, each check can report
+/// machine-readable findings for dashboard display and trend analysis.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CheckFinding {
+    /// Category of finding (e.g., "advisory", "license_violation", "mutation_survived").
+    pub category: String,
+    /// Severity: "error", "warning", "info".
+    pub severity: String,
+    /// Human-readable message.
+    pub message: String,
+    /// Optional numeric value (e.g., CVSS score, survival rate percentage).
+    #[serde(default)]
+    pub value: Option<f64>,
 }
 
 /// Summary shown at human review checkpoint.
@@ -102,6 +150,9 @@ pub struct CheckpointSummary {
     pub reviewer_output: String,
     pub gate1_report: GateReport,
     pub gate2_report: Option<GateReport>,
+    /// Trust boundary assessment for changed files.
+    #[serde(default)]
+    pub trust_assessment: Option<TrustAssessment>,
 }
 
 /// Task status as a state machine.
@@ -110,7 +161,9 @@ pub struct CheckpointSummary {
 ///   Pending -> Implementing -> Gate1Failed | Reviewing
 ///   Reviewing -> Gate2Failed | AwaitingApproval
 ///   AwaitingApproval -> Approved | Rejected
-///   Approved -> Integrating -> Gate3Failed | Merged
+///   Approved -> Integrating -> Gate3Failed | AwaitingCI | Merged
+///   AwaitingCI -> Merged | CIFailed
+///   CIFailed -> AwaitingCI (ci_fixer retry) | AwaitingApproval (escalation)
 ///   *Failed -> Implementing (retry)
 ///   Rejected -> Implementing (with feedback)
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -141,6 +194,29 @@ pub enum TaskStatus {
     Gate3Failed {
         report: GateReport,
     },
+    /// PR created, waiting for CI checks to pass.
+    AwaitingCI {
+        /// PR number (e.g. from `gh pr create`).
+        pr_number: u64,
+        /// Full PR URL for display.
+        pr_url: String,
+        /// Branch that the PR is on.
+        branch: String,
+        /// When the PR was created / CI polling started.
+        started_at: DateTime<Utc>,
+        /// How many times the ci_fixer agent has attempted to fix CI failures.
+        #[serde(default)]
+        ci_attempts: u32,
+    },
+    /// CI failed and the ci_fixer agent could not fix it within max retries.
+    CIFailed {
+        pr_number: u64,
+        pr_url: String,
+        /// Summary of the CI failure.
+        failure_summary: String,
+        /// Number of fix attempts made.
+        ci_attempts: u32,
+    },
     Merged {
         commit_sha: String,
     },
@@ -163,6 +239,8 @@ impl TaskStatus {
             TaskStatus::Approved => "approved",
             TaskStatus::Integrating => "integrating",
             TaskStatus::Gate3Failed { .. } => "gate3-failed",
+            TaskStatus::AwaitingCI { .. } => "awaiting-ci",
+            TaskStatus::CIFailed { .. } => "ci-failed",
             TaskStatus::Merged { .. } => "merged",
             TaskStatus::Rejected { .. } => "rejected",
         }
@@ -173,7 +251,10 @@ impl TaskStatus {
     }
 
     pub fn needs_human(&self) -> bool {
-        matches!(self, TaskStatus::AwaitingApproval { .. })
+        matches!(
+            self,
+            TaskStatus::AwaitingApproval { .. } | TaskStatus::CIFailed { .. }
+        )
     }
 
     /// Whether this task has a reviewable diff (in Reviewing or AwaitingApproval).
@@ -203,6 +284,34 @@ impl TaskStatus {
     pub fn is_claimable_approved(&self) -> bool {
         matches!(self, TaskStatus::Approved)
     }
+
+    /// Whether this task is awaiting CI results.
+    pub fn is_awaiting_ci(&self) -> bool {
+        matches!(self, TaskStatus::AwaitingCI { .. })
+    }
+
+    /// Extract the gate report if this status is a gate failure.
+    pub fn gate_report(&self) -> Option<&GateReport> {
+        match self {
+            TaskStatus::Gate1Failed { report }
+            | TaskStatus::Gate2Failed { report }
+            | TaskStatus::Gate3Failed { report } => Some(report),
+            _ => None,
+        }
+    }
+
+    /// Get the names of failing checks if this is a gate failure.
+    pub fn failing_check_names(&self) -> Vec<&str> {
+        match self.gate_report() {
+            Some(report) => report
+                .checks
+                .iter()
+                .filter(|c| !c.passed)
+                .map(|c| c.name.as_str())
+                .collect(),
+            None => Vec::new(),
+        }
+    }
 }
 
 /// A task in the autonomous development pipeline.
@@ -226,6 +335,29 @@ pub struct Task {
     /// How many times this task has been retried after gate failure.
     #[serde(default)]
     pub retry_count: u32,
+    /// History of gate failure reports across retry cycles.
+    ///
+    /// Each time a gate fails and the task is retried, the report is
+    /// appended here for comparison and convergence tracking.
+    #[serde(default)]
+    pub gate_history: Vec<GateReport>,
+    /// Verification-tagged acceptance criteria with tracked results.
+    ///
+    /// Populated from `acceptance_criteria` during pre-dispatch audit.
+    /// Each criterion has a verification tag (TEST, LINT, BENCH, etc.)
+    /// and accumulates verification results as gates run.
+    #[serde(default)]
+    pub tagged_criteria: Vec<TaggedCriterion>,
+    /// Tasks that must complete (reach Merged status) before this task
+    /// can be dispatched. Supports both hard (blocking) and soft (advisory)
+    /// dependency kinds.
+    #[serde(default)]
+    pub depends_on: Vec<TaskDependency>,
+    /// Optional batch barrier for grouping tasks into ordered waves.
+    /// All tasks sharing a batch barrier must complete before the next
+    /// batch is dispatched. Enables post-merge compilation checks between batches.
+    #[serde(default)]
+    pub batch_barrier: Option<BatchBarrier>,
     pub created_at: DateTime<Utc>,
     pub updated_at: DateTime<Utc>,
 }
@@ -246,6 +378,10 @@ impl Task {
             context_id: None,
             spec: None,
             retry_count: 0,
+            gate_history: Vec::new(),
+            tagged_criteria: Vec::new(),
+            depends_on: Vec::new(),
+            batch_barrier: None,
             created_at: now,
             updated_at: now,
         }
@@ -256,6 +392,19 @@ impl Task {
         self.retry_count < MAX_RETRIES
     }
 
+    /// Whether this task has any hard (blocking) dependencies.
+    pub fn has_dependencies(&self) -> bool {
+        !self.depends_on.is_empty()
+    }
+
+    /// Check if all hard dependencies are satisfied (present in `completed` set).
+    pub fn dependencies_satisfied(&self, completed: &std::collections::HashSet<i64>) -> bool {
+        use crate::dependency::DependencyKind;
+        self.depends_on.iter().all(|dep| {
+            dep.kind == DependencyKind::SoftOrder || completed.contains(&dep.prerequisite.0)
+        })
+    }
+
     /// Branch name for this task's implementation.
     pub fn branch_name(&self) -> String {
         let slug: String = self
@@ -383,6 +532,7 @@ mod tests {
                         duration_secs: 0.0,
                     },
                     gate2_report: None,
+                    trust_assessment: None,
                 }
             }
             .needs_human()
@@ -400,4 +550,106 @@ mod tests {
             RepoName::new("custom-project")
         );
     }
+
+    #[test]
+    fn gate_report_extracts_from_gate_failed_status() {
+        let report = GateReport {
+            level: GateLevel::Quality,
+            checks: vec![
+                CheckResult {
+                    name: "cargo_fmt".into(),
+                    passed: true,
+                    stdout: String::new(),
+                    stderr: String::new(),
+                    exit_code: 0,
+                    duration_secs: 0.0,
+                    findings: Vec::new(),
+                },
+                CheckResult {
+                    name: "cargo_clippy".into(),
+                    passed: false,
+                    stdout: String::new(),
+                    stderr: "error: unused variable".into(),
+                    exit_code: 1,
+                    duration_secs: 0.0,
+                    findings: Vec::new(),
+                },
+            ],
+            passed: false,
+            duration_secs: 5.0,
+        };
+        let status = TaskStatus::Gate1Failed {
+            report: report.clone(),
+        };
+        assert!(status.gate_report().is_some());
+        assert_eq!(status.gate_report().unwrap().checks.len(), 2);
+        assert!(!status.gate_report().unwrap().passed);
+    }
+
+    #[test]
+    fn gate_report_returns_none_for_non_failed_status() {
+        assert!(TaskStatus::Pending.gate_report().is_none());
+        assert!(TaskStatus::Approved.gate_report().is_none());
+        assert!(
+            TaskStatus::Implementing {
+                branch: "test".into(),
+                started_at: Utc::now()
+            }
+            .gate_report()
+            .is_none()
+        );
+    }
+
+    #[test]
+    fn failing_check_names_returns_failed_checks() {
+        let status = TaskStatus::Gate1Failed {
+            report: GateReport {
+                level: GateLevel::Quality,
+                checks: vec![
+                    CheckResult {
+                        name: "cargo_fmt".into(),
+                        passed: true,
+                        stdout: String::new(),
+                        stderr: String::new(),
+                        exit_code: 0,
+                        duration_secs: 0.0,
+                        findings: Vec::new(),
+                    },
+                    CheckResult {
+                        name: "cargo_clippy".into(),
+                        passed: false,
+                        stdout: String::new(),
+                        stderr: "error".into(),
+                        exit_code: 1,
+                        duration_secs: 0.0,
+                        findings: Vec::new(),
+                    },
+                    CheckResult {
+                        name: "cargo_test".into(),
+                        passed: false,
+                        stdout: String::new(),
+                        stderr: "test failed".into(),
+                        exit_code: 1,
+                        duration_secs: 0.0,
+                        findings: Vec::new(),
+                    },
+                ],
+                passed: false,
+                duration_secs: 10.0,
+            },
+        };
+        let names = status.failing_check_names();
+        assert_eq!(names, vec!["cargo_clippy", "cargo_test"]);
+    }
+
+    #[test]
+    fn failing_check_names_empty_for_non_failed() {
+        assert!(TaskStatus::Pending.failing_check_names().is_empty());
+    }
+
+    #[test]
+    fn gate_history_preserved_on_new_task() {
+        let task = Task::new(RepoName::new("test"), "title".into(), "desc".into());
+        assert!(task.gate_history.is_empty());
+    }
 }
diff --git a/crates/thrum-core/src/telemetry.rs b/crates/thrum-core/src/telemetry.rs
index b04c8be..2a5fff2 100644
--- a/crates/thrum-core/src/telemetry.rs
+++ b/crates/thrum-core/src/telemetry.rs
@@ -299,6 +299,9 @@ pub struct TraceFilter {
     pub target_prefix: Option<String>,
     /// Filter by field key=value (e.g., "task.id=42").
     pub field_filter: Option<(String, String)>,
+    /// When true, only include pipeline-meaningful events (gate results,
+    /// state transitions, errors) and filter out generic infrastructure noise.
+    pub pipeline_only: bool,
 }
 
 impl TraceFilter {
@@ -333,10 +336,145 @@ impl TraceFilter {
                 return false;
             }
         }
+        if self.pipeline_only && !is_pipeline_event(event) {
+            return false;
+        }
         true
     }
 }
 
+/// Check whether a stored trace event represents a meaningful pipeline event
+/// (gate results, state transitions, errors, warnings) vs generic infrastructure
+/// noise (config loading, CLI invocations, debug output).
+///
+/// Pipeline-meaningful events include:
+/// - Any ERROR or WARN level event
+/// - Events with pipeline-specific fields (task.id, gate.level, pipeline.stage, etc.)
+/// - Events whose message matches known pipeline patterns (state transitions, gate pass/fail)
+pub fn is_pipeline_event(event: &StoredTraceEvent) -> bool {
+    // Errors and warnings are always meaningful
+    if let Some(ref level) = event.level {
+        let upper = level.to_uppercase();
+        if upper == "ERROR" || upper == "WARN" {
+            return true;
+        }
+    }
+
+    // Events with pipeline-specific structured fields are meaningful
+    if let serde_json::Value::Object(ref map) = event.fields {
+        let pipeline_fields = [
+            attrs::TASK_ID,
+            attrs::GATE_LEVEL,
+            attrs::GATE_PASSED,
+            attrs::PIPELINE_STAGE,
+            attrs::CHECK_NAME,
+            attrs::CHECK_PASSED,
+            attrs::REQUIREMENT_ID,
+            attrs::GIT_COMMIT,
+        ];
+        for field in &pipeline_fields {
+            if map.contains_key(*field) {
+                return true;
+            }
+        }
+    }
+
+    // Check message content for known pipeline patterns
+    let msg = event
+        .message
+        .as_deref()
+        .or_else(|| event.fields.get("message").and_then(|v| v.as_str()))
+        .unwrap_or("");
+
+    let msg_lower = msg.to_lowercase();
+
+    // Pipeline-meaningful message patterns
+    static PIPELINE_PATTERNS: &[&str] = &[
+        "gate",
+        "state transition",
+        "task failed",
+        "task passed",
+        "task merged",
+        "task approved",
+        "task rejected",
+        "retry",
+        "convergence",
+        "budget",
+        "agent started",
+        "agent finished",
+        "checkpoint",
+        "approval",
+        "integration",
+        "ci polling",
+        "ci passed",
+        "ci failed",
+        "ci fix",
+        "ci escalated",
+        "pr #",
+        "rebase",
+        "sync started",
+        "sync completed",
+        "sync failed",
+    ];
+
+    for pattern in PIPELINE_PATTERNS {
+        if msg_lower.contains(pattern) {
+            return true;
+        }
+    }
+
+    // Check target module for pipeline-specific modules
+    if let Some(ref target) = event.target {
+        let target_lower = target.to_lowercase();
+        if target_lower.contains("engine::pipeline")
+            || target_lower.contains("gate")
+            || target_lower.contains("convergence")
+        {
+            return true;
+        }
+    }
+
+    false
+}
+
+/// Check whether an engine log message is pipeline-meaningful (for SSE filtering).
+///
+/// Returns true for messages about gate results, state changes, errors, budget,
+/// and other pipeline-level events. Returns false for generic infrastructure
+/// messages like "loaded pipeline config" or "invoking claude CLI".
+pub fn is_pipeline_log_message(message: &str) -> bool {
+    let msg_lower = message.to_lowercase();
+
+    // Infrastructure noise patterns to exclude
+    static INFRA_NOISE: &[&str] = &[
+        "loaded pipeline config",
+        "loaded repos config",
+        "invoking claude cli",
+        "spawning subprocess",
+        "reading config",
+        "initializing",
+        "starting http server",
+        "listening on",
+        "connected to",
+        "loading plugin",
+        "registering handler",
+        "parsing",
+        "compiling",
+        "opening database",
+        "trace directory",
+    ];
+
+    for noise in INFRA_NOISE {
+        if msg_lower.contains(noise) {
+            return false;
+        }
+    }
+
+    // If it doesn't match any noise pattern, keep it (inclusive by default
+    // for EngineLog events, since they are already curated by the engine)
+    true
+}
+
 /// Summary info about stored traces.
 #[derive(Debug)]
 pub struct TraceSummary {
@@ -477,4 +615,173 @@ mod tests {
         assert!(display.contains("invoking claude CLI"));
         assert!(display.contains("prompt_len"));
     }
+
+    // ── Pipeline Event Filter Tests ─────────────────────────────────────
+
+    #[test]
+    fn pipeline_filter_passes_errors() {
+        let event = StoredTraceEvent {
+            timestamp: None,
+            level: Some("ERROR".into()),
+            message: Some("something broke".into()),
+            fields: serde_json::Value::Object(Default::default()),
+            target: None,
+            span: None,
+            spans: None,
+        };
+        assert!(is_pipeline_event(&event));
+    }
+
+    #[test]
+    fn pipeline_filter_passes_warnings() {
+        let event = StoredTraceEvent {
+            timestamp: None,
+            level: Some("WARN".into()),
+            message: Some("approaching budget limit".into()),
+            fields: serde_json::Value::Object(Default::default()),
+            target: None,
+            span: None,
+            spans: None,
+        };
+        assert!(is_pipeline_event(&event));
+    }
+
+    #[test]
+    fn pipeline_filter_passes_gate_events() {
+        let event = StoredTraceEvent {
+            timestamp: None,
+            level: Some("INFO".into()),
+            message: Some("running checks".into()),
+            fields: serde_json::json!({"gate.level": "quality", "gate.passed": true}),
+            target: None,
+            span: None,
+            spans: None,
+        };
+        assert!(is_pipeline_event(&event));
+    }
+
+    #[test]
+    fn pipeline_filter_passes_task_id_events() {
+        let event = StoredTraceEvent {
+            timestamp: None,
+            level: Some("INFO".into()),
+            message: Some("processing".into()),
+            fields: serde_json::json!({"task.id": "TASK-0042"}),
+            target: None,
+            span: None,
+            spans: None,
+        };
+        assert!(is_pipeline_event(&event));
+    }
+
+    #[test]
+    fn pipeline_filter_passes_state_transition_message() {
+        let event = StoredTraceEvent {
+            timestamp: None,
+            level: Some("INFO".into()),
+            message: Some("state transition: pending -> implementing".into()),
+            fields: serde_json::Value::Object(Default::default()),
+            target: None,
+            span: None,
+            spans: None,
+        };
+        assert!(is_pipeline_event(&event));
+    }
+
+    #[test]
+    fn pipeline_filter_rejects_infra_noise() {
+        let event = StoredTraceEvent {
+            timestamp: None,
+            level: Some("INFO".into()),
+            message: Some("loaded repos config from configs/repos.toml".into()),
+            fields: serde_json::Value::Object(Default::default()),
+            target: Some("thrum_cli::config".into()),
+            span: None,
+            spans: None,
+        };
+        assert!(!is_pipeline_event(&event));
+    }
+
+    #[test]
+    fn pipeline_filter_rejects_generic_info() {
+        // A generic INFO event with no pipeline-specific content
+        let event = StoredTraceEvent {
+            timestamp: None,
+            level: Some("INFO".into()),
+            message: Some("opening database at thrum.redb".into()),
+            fields: serde_json::Value::Object(Default::default()),
+            target: Some("thrum_db".into()),
+            span: None,
+            spans: None,
+        };
+        assert!(!is_pipeline_event(&event));
+    }
+
+    #[test]
+    fn pipeline_filter_passes_gate_target() {
+        let event = StoredTraceEvent {
+            timestamp: None,
+            level: Some("INFO".into()),
+            message: Some("running cargo test".into()),
+            fields: serde_json::Value::Object(Default::default()),
+            target: Some("thrum_runner::gate".into()),
+            span: None,
+            spans: None,
+        };
+        assert!(is_pipeline_event(&event));
+    }
+
+    #[test]
+    fn pipeline_filter_with_trace_filter() {
+        let event = StoredTraceEvent {
+            timestamp: None,
+            level: Some("INFO".into()),
+            message: Some("reading config file".into()),
+            fields: serde_json::Value::Object(Default::default()),
+            target: Some("thrum_cli".into()),
+            span: None,
+            spans: None,
+        };
+        let filter = TraceFilter {
+            pipeline_only: true,
+            ..Default::default()
+        };
+        assert!(!filter.matches(&event));
+
+        let gate_event = StoredTraceEvent {
+            timestamp: None,
+            level: Some("INFO".into()),
+            message: Some("gate quality passed".into()),
+            fields: serde_json::Value::Object(Default::default()),
+            target: None,
+            span: None,
+            spans: None,
+        };
+        assert!(filter.matches(&gate_event));
+    }
+
+    // ── Pipeline Log Message Filter Tests ───────────────────────────────
+
+    #[test]
+    fn pipeline_log_rejects_infra_noise() {
+        assert!(!is_pipeline_log_message(
+            "loaded pipeline config from configs/pipeline.toml"
+        ));
+        assert!(!is_pipeline_log_message(
+            "invoking claude CLI for TASK-0001"
+        ));
+        assert!(!is_pipeline_log_message("spawning subprocess: cargo test"));
+        assert!(!is_pipeline_log_message(
+            "starting http server on 0.0.0.0:3000"
+        ));
+        assert!(!is_pipeline_log_message("opening database at thrum.redb"));
+    }
+
+    #[test]
+    fn pipeline_log_passes_meaningful_messages() {
+        assert!(is_pipeline_log_message("task TASK-0001 failed gate 1"));
+        assert!(is_pipeline_log_message("budget exhausted, pausing engine"));
+        assert!(is_pipeline_log_message("approaching budget ceiling"));
+        assert!(is_pipeline_log_message("agent-1 started on TASK-0001"));
+    }
 }
diff --git a/crates/thrum-core/src/traceability.rs b/crates/thrum-core/src/traceability.rs
index 9bf9be3..fa272a0 100644
--- a/crates/thrum-core/src/traceability.rs
+++ b/crates/thrum-core/src/traceability.rs
@@ -49,8 +49,6 @@ pub enum TraceArtifact {
     },
 }
 
-// TCL is defined in crate::safety::Tcl — use that for tool classification.
-
 /// Full traceability matrix for a release.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct TraceabilityMatrix {
@@ -70,6 +68,70 @@ pub struct TraceMatrixEntry {
 }
 
 impl TraceabilityMatrix {
+    /// Build a traceability matrix from a collection of trace records.
+    ///
+    /// Groups records by requirement_id and extracts the status of each
+    /// artifact type to populate the matrix entries.
+    pub fn from_records(records: &[TraceRecord]) -> Self {
+        use std::collections::HashMap;
+
+        let mut by_req: HashMap<String, Vec<&TraceRecord>> = HashMap::new();
+        for r in records {
+            by_req.entry(r.requirement_id.clone()).or_default().push(r);
+        }
+
+        let mut entries: Vec<TraceMatrixEntry> = by_req
+            .into_iter()
+            .map(|(req_id, recs)| {
+                let mut entry = TraceMatrixEntry {
+                    requirement_id: req_id,
+                    design: None,
+                    implementation_commit: None,
+                    test_status: None,
+                    proof_status: None,
+                    review_status: None,
+                };
+                for r in recs {
+                    match &r.artifact {
+                        TraceArtifact::Design { rationale } => {
+                            entry.design = Some(rationale.clone());
+                        }
+                        TraceArtifact::Implementation { commit_sha, .. } => {
+                            entry.implementation_commit =
+                                Some(commit_sha.clone().unwrap_or_default());
+                        }
+                        TraceArtifact::Test { passed, .. } => {
+                            // Latest test result wins, but a failure overrides success
+                            entry.test_status = Some(match entry.test_status {
+                                Some(prev) => prev && *passed,
+                                None => *passed,
+                            });
+                        }
+                        TraceArtifact::Proof { passed, .. } => {
+                            entry.proof_status = Some(match entry.proof_status {
+                                Some(prev) => prev && *passed,
+                                None => *passed,
+                            });
+                        }
+                        TraceArtifact::Review { approved, .. } => {
+                            entry.review_status = Some(*approved);
+                        }
+                        _ => {}
+                    }
+                }
+                entry
+            })
+            .collect();
+
+        entries.sort_by(|a, b| a.requirement_id.cmp(&b.requirement_id));
+
+        Self {
+            tool: "thrum".to_string(),
+            version: env!("CARGO_PKG_VERSION").to_string(),
+            entries,
+        }
+    }
+
     /// Export as CSV (for certification documentation).
     pub fn to_csv(&self) -> String {
         let mut out = String::from("requirement_id,design,implementation,test,proof,review\n");
@@ -87,3 +149,168 @@ impl TraceabilityMatrix {
         out
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_record(id: i64, task_id: i64, req_id: &str, artifact: TraceArtifact) -> TraceRecord {
+        TraceRecord {
+            id,
+            task_id,
+            requirement_id: req_id.to_string(),
+            artifact,
+            created_at: chrono::Utc::now(),
+        }
+    }
+
+    #[test]
+    fn matrix_from_empty_records() {
+        let matrix = TraceabilityMatrix::from_records(&[]);
+        assert!(matrix.entries.is_empty());
+        assert_eq!(matrix.tool, "thrum");
+    }
+
+    #[test]
+    fn matrix_groups_by_requirement() {
+        let records = vec![
+            make_record(
+                1,
+                1,
+                "REQ-001",
+                TraceArtifact::Requirement {
+                    title: "Req 1".into(),
+                    description: "Desc".into(),
+                },
+            ),
+            make_record(
+                2,
+                1,
+                "REQ-001",
+                TraceArtifact::Implementation {
+                    branch: "auto/TASK-0001".into(),
+                    commit_sha: Some("abc123".into()),
+                    files_changed: vec!["src/lib.rs".into()],
+                },
+            ),
+            make_record(
+                3,
+                1,
+                "REQ-001",
+                TraceArtifact::Test {
+                    gate_level: "Quality".into(),
+                    passed: true,
+                    report_json: "{}".into(),
+                },
+            ),
+            make_record(
+                4,
+                2,
+                "REQ-002",
+                TraceArtifact::Design {
+                    rationale: "Design rationale".into(),
+                },
+            ),
+        ];
+
+        let matrix = TraceabilityMatrix::from_records(&records);
+        assert_eq!(matrix.entries.len(), 2);
+
+        let req001 = matrix
+            .entries
+            .iter()
+            .find(|e| e.requirement_id == "REQ-001")
+            .unwrap();
+        assert_eq!(req001.implementation_commit, Some("abc123".into()));
+        assert_eq!(req001.test_status, Some(true));
+        assert!(req001.proof_status.is_none());
+
+        let req002 = matrix
+            .entries
+            .iter()
+            .find(|e| e.requirement_id == "REQ-002")
+            .unwrap();
+        assert_eq!(req002.design, Some("Design rationale".into()));
+    }
+
+    #[test]
+    fn matrix_test_failure_overrides_success() {
+        let records = vec![
+            make_record(
+                1,
+                1,
+                "REQ-001",
+                TraceArtifact::Test {
+                    gate_level: "Quality".into(),
+                    passed: true,
+                    report_json: "{}".into(),
+                },
+            ),
+            make_record(
+                2,
+                1,
+                "REQ-001",
+                TraceArtifact::Test {
+                    gate_level: "Integration".into(),
+                    passed: false,
+                    report_json: "{}".into(),
+                },
+            ),
+        ];
+
+        let matrix = TraceabilityMatrix::from_records(&records);
+        assert_eq!(matrix.entries.len(), 1);
+        // A failure should override the previous success
+        assert_eq!(matrix.entries[0].test_status, Some(false));
+    }
+
+    #[test]
+    fn matrix_review_status() {
+        let records = vec![make_record(
+            1,
+            1,
+            "REQ-001",
+            TraceArtifact::Review {
+                reviewer: "claude".into(),
+                approved: true,
+                comments: "LGTM".into(),
+            },
+        )];
+
+        let matrix = TraceabilityMatrix::from_records(&records);
+        assert_eq!(matrix.entries[0].review_status, Some(true));
+    }
+
+    #[test]
+    fn matrix_csv_export() {
+        let records = vec![
+            make_record(
+                1,
+                1,
+                "REQ-001",
+                TraceArtifact::Test {
+                    gate_level: "Quality".into(),
+                    passed: true,
+                    report_json: "{}".into(),
+                },
+            ),
+            make_record(
+                2,
+                1,
+                "REQ-001",
+                TraceArtifact::Review {
+                    reviewer: "claude".into(),
+                    approved: false,
+                    comments: "needs work".into(),
+                },
+            ),
+        ];
+
+        let matrix = TraceabilityMatrix::from_records(&records);
+        let csv = matrix.to_csv();
+        assert!(csv.contains("requirement_id,design,implementation,test,proof,review"));
+        assert!(csv.contains("REQ-001"));
+        assert!(csv.contains("true"));
+        assert!(csv.contains("false"));
+    }
+}
diff --git a/crates/thrum-core/src/trust.rs b/crates/thrum-core/src/trust.rs
new file mode 100644
index 0000000..4ac73bc
--- /dev/null
+++ b/crates/thrum-core/src/trust.rs
@@ -0,0 +1,531 @@
+//! Trust boundaries and risk stratification for file-level access control.
+//!
+//! Repos can declare which files are high-risk, security-sensitive, or safe
+//! for auto-approval via glob patterns in `[repo.trust]`. The pipeline uses
+//! these classifications to:
+//!
+//! - Block auto-approval of changes touching high-risk files
+//! - Trigger extra security checks (cargo-audit, cargo-deny) for security-sensitive files
+//! - Allow fast-path approval for changes confined to auto-ok paths
+
+use serde::{Deserialize, Serialize};
+
+/// Trust boundary configuration for a repository.
+///
+/// Parsed from the `[repo.trust]` section in repos.toml.
+/// All fields are lists of glob patterns matched against file paths
+/// relative to the repo root.
+#[derive(Debug, Clone, Default, Deserialize, Serialize)]
+pub struct TrustConfig {
+    /// Glob patterns for files that MUST go through human review.
+    /// Changes touching these files cannot be auto-approved.
+    /// Example: `["src/crypto/**", "src/safety/**", "Cargo.lock"]`
+    #[serde(default)]
+    pub high_risk: Vec<String>,
+
+    /// Glob patterns for files that trigger extra security checks
+    /// (cargo-audit, cargo-deny) but can still be auto-approved.
+    /// Example: `["Cargo.toml", "build.rs", ".github/**"]`
+    #[serde(default)]
+    pub security_sensitive: Vec<String>,
+
+    /// Glob patterns for files safe to auto-approve without extra checks.
+    /// Example: `["docs/**", "README.md", "*.md"]`
+    #[serde(default)]
+    pub auto_ok: Vec<String>,
+}
+
+/// Risk level classification for a file or change set.
+///
+/// Ordered from lowest to highest risk. The overall risk for a changeset
+/// is the maximum risk among all changed files.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
+pub enum RiskLevel {
+    /// Safe for automatic approval.
+    AutoOk,
+    /// Default — no special classification.
+    Standard,
+    /// Triggers extra security scanning.
+    SecuritySensitive,
+    /// Requires mandatory human review.
+    HighRisk,
+}
+
+impl std::fmt::Display for RiskLevel {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            RiskLevel::AutoOk => write!(f, "auto-ok"),
+            RiskLevel::Standard => write!(f, "standard"),
+            RiskLevel::SecuritySensitive => write!(f, "security-sensitive"),
+            RiskLevel::HighRisk => write!(f, "high-risk"),
+        }
+    }
+}
+
+/// Assessment of trust boundaries for a set of changed files.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TrustAssessment {
+    /// Highest risk level found among all changed files.
+    pub overall_risk: RiskLevel,
+    /// Per-file risk classification.
+    pub file_risks: Vec<(String, RiskLevel)>,
+    /// True if any file is HighRisk — blocks auto-approval.
+    pub requires_human_review: bool,
+    /// True if any file is SecuritySensitive or HighRisk — triggers extra checks.
+    pub triggers_security_checks: bool,
+}
+
+impl TrustConfig {
+    /// Classify a single file path against the trust boundaries.
+    ///
+    /// Priority order: high_risk > security_sensitive > auto_ok > Standard.
+    pub fn classify(&self, path: &str) -> RiskLevel {
+        // High-risk takes precedence
+        for pattern in &self.high_risk {
+            if glob_match(pattern, path) {
+                return RiskLevel::HighRisk;
+            }
+        }
+        // Security-sensitive next
+        for pattern in &self.security_sensitive {
+            if glob_match(pattern, path) {
+                return RiskLevel::SecuritySensitive;
+            }
+        }
+        // Auto-ok
+        for pattern in &self.auto_ok {
+            if glob_match(pattern, path) {
+                return RiskLevel::AutoOk;
+            }
+        }
+        // Default: standard
+        RiskLevel::Standard
+    }
+
+    /// Assess a set of changed files against the trust boundaries.
+    pub fn assess(&self, changed_files: &[String]) -> TrustAssessment {
+        let file_risks: Vec<(String, RiskLevel)> = changed_files
+            .iter()
+            .map(|f| (f.clone(), self.classify(f)))
+            .collect();
+
+        let overall_risk = file_risks
+            .iter()
+            .map(|(_, r)| *r)
+            .max()
+            .unwrap_or(RiskLevel::Standard);
+
+        let requires_human_review = file_risks.iter().any(|(_, r)| *r == RiskLevel::HighRisk);
+        let triggers_security_checks = file_risks
+            .iter()
+            .any(|(_, r)| *r >= RiskLevel::SecuritySensitive);
+
+        TrustAssessment {
+            overall_risk,
+            file_risks,
+            requires_human_review,
+            triggers_security_checks,
+        }
+    }
+}
+
+/// Simple glob matching supporting `*` and `**` patterns.
+///
+/// - `*` matches any sequence of non-`/` characters
+/// - `**` matches any sequence of characters including `/`
+/// - Patterns without wildcards are matched as exact suffixes
+fn glob_match(pattern: &str, path: &str) -> bool {
+    if pattern.contains("**") {
+        // Split on ** and check that all parts appear in order
+        let parts: Vec<&str> = pattern.split("**").collect();
+        if parts.len() == 2 {
+            let prefix = parts[0].trim_end_matches('/');
+            let suffix = parts[1].trim_start_matches('/');
+
+            if prefix.is_empty() && suffix.is_empty() {
+                return true; // "**" matches everything
+            }
+            if prefix.is_empty() {
+                return glob_simple_match(suffix, path)
+                    || path
+                        .rfind('/')
+                        .map(|i| glob_simple_match(suffix, &path[i + 1..]))
+                        .unwrap_or(false);
+            }
+            if suffix.is_empty() {
+                return path.starts_with(prefix)
+                    && (path.len() == prefix.len()
+                        || path.as_bytes().get(prefix.len()) == Some(&b'/'));
+            }
+            // prefix/**/suffix: path starts with prefix/ and ends matching suffix
+            if path.starts_with(prefix) && path.as_bytes().get(prefix.len()) == Some(&b'/') {
+                let rest = &path[prefix.len() + 1..];
+                return glob_simple_match(suffix, rest)
+                    || rest
+                        .rfind('/')
+                        .map(|i| glob_simple_match(suffix, &rest[i + 1..]))
+                        .unwrap_or(false);
+            }
+            return false;
+        }
+        // Fallback: treat as simple
+        glob_simple_match(pattern, path)
+    } else if pattern.contains('*') {
+        glob_simple_match(pattern, path)
+    } else {
+        // Exact match only (no wildcards)
+        path == pattern
+    }
+}
+
+/// Simple glob with single `*` (matches non-`/` chars).
+fn glob_simple_match(pattern: &str, text: &str) -> bool {
+    if let Some(star_pos) = pattern.find('*') {
+        let prefix = &pattern[..star_pos];
+        let suffix = &pattern[star_pos + 1..];
+
+        if !text.starts_with(prefix) {
+            return false;
+        }
+        let rest = &text[prefix.len()..];
+
+        if suffix.is_empty() {
+            // * at end — match rest if no slashes
+            return !rest.contains('/');
+        }
+
+        // Find suffix in rest (no slashes before it)
+        if let Some(pos) = rest.find(suffix) {
+            let between = &rest[..pos];
+            return !between.contains('/') && pos + suffix.len() == rest.len();
+        }
+        false
+    } else {
+        text == pattern
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn test_config() -> TrustConfig {
+        TrustConfig {
+            high_risk: vec![
+                "src/crypto/**".into(),
+                "src/safety/**".into(),
+                "Cargo.lock".into(),
+            ],
+            security_sensitive: vec!["Cargo.toml".into(), "build.rs".into(), ".github/**".into()],
+            auto_ok: vec!["docs/**".into(), "*.md".into(), "README.md".into()],
+        }
+    }
+
+    #[test]
+    fn classify_high_risk_file() {
+        let config = test_config();
+        assert_eq!(config.classify("src/crypto/aes.rs"), RiskLevel::HighRisk);
+        assert_eq!(config.classify("src/safety/asil.rs"), RiskLevel::HighRisk);
+        assert_eq!(config.classify("Cargo.lock"), RiskLevel::HighRisk);
+    }
+
+    #[test]
+    fn classify_security_sensitive_file() {
+        let config = test_config();
+        assert_eq!(config.classify("Cargo.toml"), RiskLevel::SecuritySensitive);
+        assert_eq!(config.classify("build.rs"), RiskLevel::SecuritySensitive);
+        assert_eq!(
+            config.classify(".github/workflows/ci.yml"),
+            RiskLevel::SecuritySensitive
+        );
+    }
+
+    #[test]
+    fn classify_auto_ok_file() {
+        let config = test_config();
+        assert_eq!(config.classify("docs/guide.md"), RiskLevel::AutoOk);
+        assert_eq!(config.classify("README.md"), RiskLevel::AutoOk);
+        assert_eq!(config.classify("CHANGELOG.md"), RiskLevel::AutoOk);
+    }
+
+    #[test]
+    fn classify_standard_file() {
+        let config = test_config();
+        assert_eq!(config.classify("src/main.rs"), RiskLevel::Standard);
+        assert_eq!(config.classify("tests/integration.rs"), RiskLevel::Standard);
+    }
+
+    #[test]
+    fn high_risk_takes_priority() {
+        // A file matching both high_risk and security_sensitive
+        let config = TrustConfig {
+            high_risk: vec!["Cargo.*".into()],
+            security_sensitive: vec!["Cargo.toml".into()],
+            auto_ok: vec![],
+        };
+        assert_eq!(config.classify("Cargo.toml"), RiskLevel::HighRisk);
+    }
+
+    #[test]
+    fn assess_mixed_changeset() {
+        let config = test_config();
+        let files = vec![
+            "src/main.rs".into(),
+            "docs/readme.md".into(),
+            "src/crypto/key.rs".into(),
+        ];
+        let assessment = config.assess(&files);
+        assert_eq!(assessment.overall_risk, RiskLevel::HighRisk);
+        assert!(assessment.requires_human_review);
+        assert!(assessment.triggers_security_checks);
+    }
+
+    #[test]
+    fn assess_auto_ok_only() {
+        let config = test_config();
+        let files = vec!["docs/api.md".into(), "README.md".into()];
+        let assessment = config.assess(&files);
+        assert_eq!(assessment.overall_risk, RiskLevel::AutoOk);
+        assert!(!assessment.requires_human_review);
+        assert!(!assessment.triggers_security_checks);
+    }
+
+    #[test]
+    fn assess_security_sensitive_triggers_checks() {
+        let config = test_config();
+        let files = vec!["Cargo.toml".into(), "src/lib.rs".into()];
+        let assessment = config.assess(&files);
+        assert_eq!(assessment.overall_risk, RiskLevel::SecuritySensitive);
+        assert!(!assessment.requires_human_review);
+        assert!(assessment.triggers_security_checks);
+    }
+
+    #[test]
+    fn assess_empty_changeset() {
+        let config = test_config();
+        let assessment = config.assess(&[]);
+        assert_eq!(assessment.overall_risk, RiskLevel::Standard);
+        assert!(!assessment.requires_human_review);
+    }
+
+    #[test]
+    fn glob_double_star_prefix() {
+        assert!(glob_match("src/crypto/**", "src/crypto/aes.rs"));
+        assert!(glob_match("src/crypto/**", "src/crypto/deep/nested.rs"));
+        assert!(!glob_match("src/crypto/**", "src/other/file.rs"));
+    }
+
+    #[test]
+    fn glob_single_star() {
+        assert!(glob_match("*.md", "README.md"));
+        assert!(glob_match("*.md", "CHANGELOG.md"));
+        assert!(!glob_match("*.md", "docs/guide.md")); // * doesn't cross /
+    }
+
+    #[test]
+    fn glob_exact_match() {
+        assert!(glob_match("Cargo.lock", "Cargo.lock"));
+        assert!(!glob_match("Cargo.lock", "other/Cargo.lock"));
+    }
+
+    #[test]
+    fn glob_suffix_match_for_filenames() {
+        assert!(glob_match("build.rs", "build.rs"));
+    }
+
+    #[test]
+    fn risk_level_ordering() {
+        assert!(RiskLevel::AutoOk < RiskLevel::Standard);
+        assert!(RiskLevel::Standard < RiskLevel::SecuritySensitive);
+        assert!(RiskLevel::SecuritySensitive < RiskLevel::HighRisk);
+    }
+
+    #[test]
+    fn risk_level_display() {
+        assert_eq!(RiskLevel::AutoOk.to_string(), "auto-ok");
+        assert_eq!(RiskLevel::HighRisk.to_string(), "high-risk");
+        assert_eq!(
+            RiskLevel::SecuritySensitive.to_string(),
+            "security-sensitive"
+        );
+        assert_eq!(RiskLevel::Standard.to_string(), "standard");
+    }
+
+    #[test]
+    fn trust_config_default_is_empty() {
+        let config = TrustConfig::default();
+        assert!(config.high_risk.is_empty());
+        assert!(config.security_sensitive.is_empty());
+        assert!(config.auto_ok.is_empty());
+        // All files should be Standard with empty config
+        assert_eq!(config.classify("anything.rs"), RiskLevel::Standard);
+    }
+
+    #[test]
+    fn trust_config_from_toml() {
+        let toml_str = r#"
+            high_risk = ["src/crypto/**"]
+            security_sensitive = ["Cargo.toml"]
+            auto_ok = ["docs/**"]
+        "#;
+        let config: TrustConfig = toml::from_str(toml_str).unwrap();
+        assert_eq!(config.high_risk.len(), 1);
+        assert_eq!(config.security_sensitive.len(), 1);
+        assert_eq!(config.auto_ok.len(), 1);
+        assert_eq!(config.classify("src/crypto/key.rs"), RiskLevel::HighRisk);
+    }
+
+    #[test]
+    fn trust_config_partial_toml() {
+        // Only high_risk specified — others default to empty
+        let toml_str = r#"
+            high_risk = ["*.lock"]
+        "#;
+        let config: TrustConfig = toml::from_str(toml_str).unwrap();
+        assert_eq!(config.classify("Cargo.lock"), RiskLevel::HighRisk);
+        assert_eq!(config.classify("Cargo.toml"), RiskLevel::Standard);
+    }
+
+    #[test]
+    fn per_file_risks_in_assessment() {
+        let config = test_config();
+        let files = vec![
+            "src/crypto/aes.rs".into(),
+            "src/main.rs".into(),
+            "docs/api.md".into(),
+        ];
+        let assessment = config.assess(&files);
+        assert_eq!(assessment.file_risks.len(), 3);
+        assert_eq!(assessment.file_risks[0].1, RiskLevel::HighRisk);
+        assert_eq!(assessment.file_risks[1].1, RiskLevel::Standard);
+        assert_eq!(assessment.file_risks[2].1, RiskLevel::AutoOk);
+    }
+
+    // ── Pipeline-relevant integration tests ──
+
+    #[test]
+    fn high_risk_blocks_auto_approval() {
+        let config = test_config();
+
+        // Only auto-ok files → can auto-approve
+        let safe = config.assess(&["docs/guide.md".into(), "README.md".into()]);
+        assert!(
+            !safe.requires_human_review,
+            "auto-ok files should not block"
+        );
+
+        // Standard files → can auto-approve
+        let standard = config.assess(&["src/main.rs".into()]);
+        assert!(
+            !standard.requires_human_review,
+            "standard files should not block"
+        );
+
+        // Security-sensitive → can auto-approve (but triggers extra checks)
+        let sec = config.assess(&["Cargo.toml".into()]);
+        assert!(
+            !sec.requires_human_review,
+            "security-sensitive should not block"
+        );
+        assert!(
+            sec.triggers_security_checks,
+            "should trigger security checks"
+        );
+
+        // High-risk → BLOCKS auto-approval
+        let risky = config.assess(&["src/crypto/key.rs".into()]);
+        assert!(
+            risky.requires_human_review,
+            "high-risk must block auto-approval"
+        );
+    }
+
+    #[test]
+    fn security_checks_triggered_for_sensitive_and_high_risk() {
+        let config = test_config();
+
+        // auto-ok → no security checks
+        let ok = config.assess(&["docs/readme.md".into()]);
+        assert!(!ok.triggers_security_checks);
+
+        // standard → no security checks
+        let std = config.assess(&["src/lib.rs".into()]);
+        assert!(!std.triggers_security_checks);
+
+        // security-sensitive → YES
+        let sec = config.assess(&[".github/workflows/ci.yml".into()]);
+        assert!(sec.triggers_security_checks);
+
+        // high-risk → YES (superset of security-sensitive)
+        let high = config.assess(&["Cargo.lock".into()]);
+        assert!(high.triggers_security_checks);
+    }
+
+    #[test]
+    fn mixed_changeset_overall_risk_is_maximum() {
+        let config = test_config();
+
+        // Mix of standard + auto-ok → standard (higher of the two)
+        let assessment = config.assess(&["src/main.rs".into(), "docs/readme.md".into()]);
+        assert_eq!(assessment.overall_risk, RiskLevel::Standard);
+
+        // Mix of security-sensitive + standard → security-sensitive
+        let assessment = config.assess(&["Cargo.toml".into(), "src/main.rs".into()]);
+        assert_eq!(assessment.overall_risk, RiskLevel::SecuritySensitive);
+
+        // Mix of all levels → high-risk (maximum)
+        let assessment = config.assess(&[
+            "docs/readme.md".into(), // auto-ok
+            "src/main.rs".into(),    // standard
+            "Cargo.toml".into(),     // security-sensitive
+            "Cargo.lock".into(),     // high-risk
+        ]);
+        assert_eq!(assessment.overall_risk, RiskLevel::HighRisk);
+        assert!(assessment.requires_human_review);
+        assert!(assessment.triggers_security_checks);
+    }
+
+    #[test]
+    fn trust_config_from_full_repo_toml() {
+        let toml_str = r#"
+            name = "test-repo"
+            path = "/tmp/test"
+            build_cmd = "cargo build"
+            test_cmd = "cargo test"
+            lint_cmd = "cargo clippy"
+            fmt_cmd = "cargo fmt --check"
+
+            [trust]
+            high_risk = ["src/gate.rs", "src/budget.rs"]
+            security_sensitive = ["src/api.rs"]
+            auto_ok = ["assets/*"]
+        "#;
+        let config: crate::repo::RepoConfig = toml::from_str(toml_str).unwrap();
+        let trust = config.trust.expect("trust config should be present");
+        assert_eq!(trust.classify("src/gate.rs"), RiskLevel::HighRisk);
+        assert_eq!(trust.classify("src/api.rs"), RiskLevel::SecuritySensitive);
+        assert_eq!(trust.classify("assets/logo.png"), RiskLevel::AutoOk);
+        assert_eq!(trust.classify("src/main.rs"), RiskLevel::Standard);
+    }
+
+    #[test]
+    fn trust_assessment_serialization_roundtrip() {
+        let config = test_config();
+        let assessment = config.assess(&["src/crypto/aes.rs".into(), "docs/api.md".into()]);
+
+        // Serialize to JSON and back
+        let json = serde_json::to_string(&assessment).unwrap();
+        let deserialized: TrustAssessment = serde_json::from_str(&json).unwrap();
+
+        assert_eq!(deserialized.overall_risk, assessment.overall_risk);
+        assert_eq!(
+            deserialized.requires_human_review,
+            assessment.requires_human_review
+        );
+        assert_eq!(
+            deserialized.triggers_security_checks,
+            assessment.triggers_security_checks
+        );
+        assert_eq!(deserialized.file_risks.len(), assessment.file_risks.len());
+    }
+}
diff --git a/crates/thrum-core/src/verification.rs b/crates/thrum-core/src/verification.rs
new file mode 100644
index 0000000..8a11a66
--- /dev/null
+++ b/crates/thrum-core/src/verification.rs
@@ -0,0 +1,984 @@
+//! Verification-tagged acceptance criteria for harness-first engineering.
+//!
+//! Each acceptance criterion gets a verification tag specifying HOW it will be
+//! verified: (TEST), (LINT), (BENCH), (MANUAL), (BROWSER), (SECURITY).
+//!
+//! This creates traceability from requirement → verification method → result.
+//! "Hope someone reads the code" is not acceptable.
+
+use serde::{Deserialize, Serialize};
+use std::fmt;
+
+/// How strictly the pre-dispatch audit validates criteria.
+///
+/// In `Strict` mode, any untagged or vague criterion causes the audit to fail.
+/// In `Lenient` mode, untagged criteria are auto-enriched and warnings are
+/// recorded but the audit still passes.
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub enum AuditLevel {
+    /// Reject tasks with untagged or vague criteria.
+    #[default]
+    Strict,
+    /// Warn but allow tasks through (auto-enrich untagged criteria).
+    Lenient,
+}
+
+/// How an acceptance criterion will be verified.
+///
+/// Inspired by harness-first engineering (Shoemaker): if it matters,
+/// there must be a concrete, automated verification mechanism.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum VerificationTag {
+    /// Verified by automated tests (unit, integration, property-based).
+    Test,
+    /// Verified by linting / static analysis (clippy, eslint, etc.).
+    Lint,
+    /// Verified by benchmarks / performance tests.
+    Bench,
+    /// Requires manual human verification.
+    Manual,
+    /// Verified by browser / UI testing.
+    Browser,
+    /// Verified by security audit / scanning.
+    Security,
+}
+
+impl VerificationTag {
+    /// Parse a tag from its string representation (case-insensitive).
+    pub fn from_str_tag(s: &str) -> Option<Self> {
+        match s.to_uppercase().as_str() {
+            "TEST" => Some(Self::Test),
+            "LINT" => Some(Self::Lint),
+            "BENCH" => Some(Self::Bench),
+            "MANUAL" => Some(Self::Manual),
+            "BROWSER" => Some(Self::Browser),
+            "SECURITY" => Some(Self::Security),
+            _ => None,
+        }
+    }
+
+    /// The canonical string form used in criteria text, e.g. "(TEST)".
+    pub fn as_tag_str(&self) -> &'static str {
+        match self {
+            Self::Test => "(TEST)",
+            Self::Lint => "(LINT)",
+            Self::Bench => "(BENCH)",
+            Self::Manual => "(MANUAL)",
+            Self::Browser => "(BROWSER)",
+            Self::Security => "(SECURITY)",
+        }
+    }
+
+    /// All valid verification tags.
+    pub fn all() -> &'static [VerificationTag] {
+        &[
+            Self::Test,
+            Self::Lint,
+            Self::Bench,
+            Self::Manual,
+            Self::Browser,
+            Self::Security,
+        ]
+    }
+
+    /// Gate check names that correspond to this verification tag.
+    ///
+    /// Used to map gate results back to tagged criteria.
+    pub fn matching_check_names(&self) -> &'static [&'static str] {
+        match self {
+            Self::Test => &["cargo_test", "test", "integration_test", "cargo_mutants"],
+            Self::Lint => &[
+                "cargo_clippy",
+                "cargo_fmt",
+                "clippy",
+                "fmt",
+                "lint",
+                "cargo_deny",
+            ],
+            Self::Bench => &["bench", "benchmark", "perf"],
+            Self::Manual => &["manual", "review"],
+            Self::Browser => &["browser", "e2e", "playwright", "cypress"],
+            Self::Security => &["security", "audit", "cargo_audit", "cargo_deny", "advisory"],
+        }
+    }
+}
+
+impl std::fmt::Display for VerificationTag {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.as_tag_str())
+    }
+}
+
+/// An acceptance criterion with a verification tag and tracked results.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TaggedCriterion {
+    /// The human-readable criterion text (without the tag suffix).
+    pub description: String,
+    /// How this criterion will be verified.
+    pub tag: VerificationTag,
+    /// Verification results (populated as gates run).
+    #[serde(default)]
+    pub verifications: Vec<CriterionVerification>,
+}
+
+impl TaggedCriterion {
+    /// Format the criterion as a tagged string, e.g. "Tests pass (TEST)".
+    pub fn to_tagged_string(&self) -> String {
+        format!("{} {}", self.description, self.tag.as_tag_str())
+    }
+
+    /// Whether this criterion has been verified (at least one passing verification).
+    pub fn is_verified(&self) -> bool {
+        self.verifications.iter().any(|v| v.passed)
+    }
+
+    /// Whether this criterion was checked but failed.
+    pub fn is_failed(&self) -> bool {
+        !self.verifications.is_empty() && !self.is_verified()
+    }
+
+    /// Status label for display.
+    pub fn status_label(&self) -> &'static str {
+        if self.is_verified() {
+            "verified"
+        } else if self.is_failed() {
+            "failed"
+        } else {
+            "pending"
+        }
+    }
+}
+
+/// A single verification result for a criterion.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CriterionVerification {
+    /// Which gate check produced this result (e.g. "cargo_test").
+    pub check_name: String,
+    /// Whether the verification passed.
+    pub passed: bool,
+    /// When the verification ran.
+    pub timestamp: chrono::DateTime<chrono::Utc>,
+}
+
+// ─── Verification report ─────────────────────────────────────────────────
+
+/// Structured verification report aggregating per-criterion results.
+///
+/// Generated after gates run, this provides a single snapshot of which
+/// acceptance criteria were verified, which failed, and which remain pending.
+/// Used by the dashboard and audit trail.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VerificationReport {
+    /// Task ID this report is for.
+    pub task_id: i64,
+    /// Per-criterion verification entries.
+    pub entries: Vec<VerificationReportEntry>,
+    /// Overall counts.
+    pub verified_count: usize,
+    pub failed_count: usize,
+    pub pending_count: usize,
+    pub total_count: usize,
+}
+
+/// A single entry in a verification report.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VerificationReportEntry {
+    /// The criterion description.
+    pub description: String,
+    /// The verification tag.
+    pub tag: VerificationTag,
+    /// Current status: "verified", "failed", or "pending".
+    pub status: String,
+    /// Which checks contributed to this criterion's verification.
+    pub check_names: Vec<String>,
+}
+
+impl VerificationReport {
+    /// Build a report from a task's tagged criteria.
+    pub fn from_criteria(task_id: i64, criteria: &[TaggedCriterion]) -> Self {
+        let (verified_count, failed_count, pending_count, total_count) =
+            verification_summary(criteria);
+
+        let entries = criteria
+            .iter()
+            .map(|tc| VerificationReportEntry {
+                description: tc.description.clone(),
+                tag: tc.tag,
+                status: tc.status_label().to_string(),
+                check_names: tc
+                    .verifications
+                    .iter()
+                    .map(|v| v.check_name.clone())
+                    .collect(),
+            })
+            .collect();
+
+        Self {
+            task_id,
+            entries,
+            verified_count,
+            failed_count,
+            pending_count,
+            total_count,
+        }
+    }
+
+    /// Whether all criteria are verified.
+    pub fn all_verified(&self) -> bool {
+        self.verified_count == self.total_count && self.total_count > 0
+    }
+
+    /// Whether any criteria failed.
+    pub fn has_failures(&self) -> bool {
+        self.failed_count > 0
+    }
+}
+
+impl fmt::Display for VerificationReport {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        writeln!(
+            f,
+            "Verification Report (TASK-{:04}): {}/{} verified, {} failed, {} pending",
+            self.task_id,
+            self.verified_count,
+            self.total_count,
+            self.failed_count,
+            self.pending_count
+        )?;
+        for entry in &self.entries {
+            let icon = match entry.status.as_str() {
+                "verified" => "✓",
+                "failed" => "✗",
+                _ => "○",
+            };
+            write!(f, "  {icon} {} {}", entry.description, entry.tag)?;
+            if !entry.check_names.is_empty() {
+                write!(f, " [{}]", entry.check_names.join(", "))?;
+            }
+            writeln!(f)?;
+        }
+        Ok(())
+    }
+}
+
+// ─── Parsing ────────────────────────────────────────────────────────────
+
+/// Parse a tagged criterion from a string like "Tests pass (TEST)".
+///
+/// Returns `None` if no valid tag is found at the end.
+pub fn parse_tagged_criterion(s: &str) -> Option<TaggedCriterion> {
+    let trimmed = s.trim();
+
+    // Look for a parenthesized tag at the end, e.g. "(TEST)"
+    if let Some(open) = trimmed.rfind('(')
+        && trimmed.ends_with(')')
+    {
+        let tag_str = &trimmed[open + 1..trimmed.len() - 1];
+        if let Some(tag) = VerificationTag::from_str_tag(tag_str) {
+            let description = trimmed[..open].trim().to_string();
+            return Some(TaggedCriterion {
+                description,
+                tag,
+                verifications: Vec::new(),
+            });
+        }
+    }
+
+    None
+}
+
+/// Parse all criteria from string list, returning tagged ones and errors.
+pub fn parse_all_criteria(criteria: &[String]) -> (Vec<TaggedCriterion>, Vec<String>) {
+    let mut tagged = Vec::new();
+    let mut untagged = Vec::new();
+
+    for criterion in criteria {
+        match parse_tagged_criterion(criterion) {
+            Some(tc) => tagged.push(tc),
+            None => untagged.push(criterion.clone()),
+        }
+    }
+
+    (tagged, untagged)
+}
+
+// ─── Pre-dispatch audit ─────────────────────────────────────────────────
+
+/// Result of auditing a task's acceptance criteria before dispatch.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AuditResult {
+    /// Whether the audit passed (all criteria are tagged and concrete).
+    pub passed: bool,
+    /// Feedback messages for the user/planner.
+    pub feedback: Vec<String>,
+    /// Successfully parsed tagged criteria.
+    pub tagged_criteria: Vec<TaggedCriterion>,
+}
+
+/// Patterns that indicate a vague, non-measurable criterion.
+///
+/// If a criterion description (lowercased) contains any of these,
+/// the audit flags it as vague and asks for a concrete, measurable version.
+const VAGUE_PATTERNS: &[&str] = &[
+    "make it better",
+    "improve",
+    "fix stuff",
+    "clean up",
+    "looks good",
+    "should work",
+    "make it fast",
+    "make it nice",
+    "do it right",
+    "handle edge cases",
+    "be robust",
+    "work properly",
+    "good enough",
+    "as expected",
+];
+
+/// Minimum description length (characters) for a criterion to be considered
+/// concrete. Very short criteria like "fast" or "works" are likely vague.
+const MIN_CRITERION_LENGTH: usize = 10;
+
+/// Check if a criterion description is vague or non-measurable.
+pub fn is_vague_criterion(description: &str) -> bool {
+    let lower = description.to_lowercase();
+    let trimmed = lower.trim();
+
+    // Too short to be measurable
+    if trimmed.len() < MIN_CRITERION_LENGTH {
+        return true;
+    }
+
+    // Contains known vague patterns
+    VAGUE_PATTERNS.iter().any(|pattern| lower.contains(pattern))
+}
+
+/// Audit acceptance criteria before a task moves from Pending to Implementing.
+///
+/// Uses `AuditLevel::Strict` by default. See [`audit_criteria_with_level`]
+/// for configurable strictness.
+///
+/// Validates that:
+/// 1. Every criterion has a verification tag.
+/// 2. No criterion is vague (e.g. "make it better").
+///
+/// Returns an `AuditResult` with feedback if the audit fails.
+pub fn audit_criteria(criteria: &[String]) -> AuditResult {
+    audit_criteria_with_level(criteria, AuditLevel::Strict)
+}
+
+/// Audit acceptance criteria with configurable strictness.
+///
+/// In `Strict` mode, untagged or vague criteria cause the audit to fail.
+/// In `Lenient` mode, untagged criteria are auto-enriched and warnings are
+/// recorded in feedback, but the audit passes.
+pub fn audit_criteria_with_level(criteria: &[String], level: AuditLevel) -> AuditResult {
+    if criteria.is_empty() {
+        return AuditResult {
+            passed: true,
+            feedback: vec![
+                "No acceptance criteria defined — task will proceed without criteria.".into(),
+            ],
+            tagged_criteria: Vec::new(),
+        };
+    }
+
+    let (tagged, untagged) = parse_all_criteria(criteria);
+    let mut feedback = Vec::new();
+
+    // Check for untagged criteria
+    for criterion in &untagged {
+        feedback.push(format!(
+            "Untagged criterion: \"{criterion}\". Add a verification tag like (TEST), (LINT), (BENCH), (MANUAL), (BROWSER), or (SECURITY)."
+        ));
+    }
+
+    // Check for vague criteria
+    for tc in &tagged {
+        if is_vague_criterion(&tc.description) {
+            feedback.push(format!(
+                "Vague criterion: \"{}\". Make it concrete and measurable.",
+                tc.description
+            ));
+        }
+    }
+
+    let passed = match level {
+        AuditLevel::Strict => untagged.is_empty() && feedback.is_empty(),
+        AuditLevel::Lenient => {
+            // In lenient mode, we still report issues but pass the audit.
+            // Untagged criteria should have been enriched by the caller.
+            true
+        }
+    };
+
+    AuditResult {
+        passed,
+        feedback,
+        tagged_criteria: tagged,
+    }
+}
+
+// ─── Gate result mapping ────────────────────────────────────────────────
+
+/// Map gate check results to tagged criteria, recording which criteria
+/// were verified (or failed) by which checks.
+///
+/// Returns the updated criteria with verification results attached.
+pub fn map_gate_results(
+    criteria: &[TaggedCriterion],
+    checks: &[crate::task::CheckResult],
+) -> Vec<TaggedCriterion> {
+    let now = chrono::Utc::now();
+
+    criteria
+        .iter()
+        .map(|tc| {
+            let mut updated = tc.clone();
+            let matching_names = tc.tag.matching_check_names();
+
+            for check in checks {
+                let check_lower = check.name.to_lowercase();
+                let matches = matching_names.iter().any(|name| check_lower.contains(name));
+
+                if matches {
+                    updated.verifications.push(CriterionVerification {
+                        check_name: check.name.clone(),
+                        passed: check.passed,
+                        timestamp: now,
+                    });
+                }
+            }
+
+            updated
+        })
+        .collect()
+}
+
+/// Generate a verification summary for display.
+///
+/// Returns (verified_count, failed_count, pending_count, total).
+pub fn verification_summary(criteria: &[TaggedCriterion]) -> (usize, usize, usize, usize) {
+    let total = criteria.len();
+    let verified = criteria.iter().filter(|c| c.is_verified()).count();
+    let failed = criteria.iter().filter(|c| c.is_failed()).count();
+    let pending = total - verified - failed;
+    (verified, failed, pending, total)
+}
+
+// ─── Planner enrichment ─────────────────────────────────────────────────
+
+/// Suggest verification tags for untagged criteria based on keywords.
+///
+/// This is a best-effort heuristic — the planner agent should do the real
+/// enrichment using LLM intelligence.
+pub fn suggest_tag(criterion: &str) -> VerificationTag {
+    let lower = criterion.to_lowercase();
+
+    if lower.contains("clippy")
+        || lower.contains("lint")
+        || lower.contains("fmt")
+        || lower.contains("format")
+        || lower.contains("warning")
+    {
+        VerificationTag::Lint
+    } else if lower.contains("bench")
+        || lower.contains("latency")
+        || lower.contains("throughput")
+        || lower.contains("p99")
+        || lower.contains("p95")
+        || lower.contains("perf")
+    {
+        VerificationTag::Bench
+    } else if lower.contains("browser")
+        || lower.contains("ui")
+        || lower.contains("render")
+        || lower.contains("display")
+        || lower.contains("dashboard")
+        || lower.contains("visible")
+    {
+        VerificationTag::Browser
+    } else if lower.contains("security")
+        || lower.contains("auth")
+        || lower.contains("cve")
+        || lower.contains("vulnerability")
+        || lower.contains("xss")
+        || lower.contains("injection")
+    {
+        VerificationTag::Security
+    } else if lower.contains("manual")
+        || lower.contains("review")
+        || lower.contains("inspect")
+        || lower.contains("human")
+    {
+        VerificationTag::Manual
+    } else {
+        // Default: most criteria are verifiable by tests
+        VerificationTag::Test
+    }
+}
+
+/// Enrich untagged criteria by adding suggested verification tags.
+///
+/// Already-tagged criteria are preserved as-is.
+pub fn enrich_criteria(criteria: &[String]) -> Vec<String> {
+    criteria
+        .iter()
+        .map(|c| {
+            if parse_tagged_criterion(c).is_some() {
+                // Already tagged
+                c.clone()
+            } else {
+                // Add suggested tag
+                let tag = suggest_tag(c);
+                format!("{} {}", c.trim(), tag.as_tag_str())
+            }
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_test_tag() {
+        let tc = parse_tagged_criterion("All tests pass (TEST)").unwrap();
+        assert_eq!(tc.description, "All tests pass");
+        assert_eq!(tc.tag, VerificationTag::Test);
+        assert!(tc.verifications.is_empty());
+    }
+
+    #[test]
+    fn parse_lint_tag() {
+        let tc = parse_tagged_criterion("No clippy warnings (LINT)").unwrap();
+        assert_eq!(tc.description, "No clippy warnings");
+        assert_eq!(tc.tag, VerificationTag::Lint);
+    }
+
+    #[test]
+    fn parse_bench_tag() {
+        let tc = parse_tagged_criterion("P99 latency below 50ms on /api/tasks (BENCH)").unwrap();
+        assert_eq!(tc.description, "P99 latency below 50ms on /api/tasks");
+        assert_eq!(tc.tag, VerificationTag::Bench);
+    }
+
+    #[test]
+    fn parse_all_tags() {
+        for tag in VerificationTag::all() {
+            let input = format!("Some criterion {}", tag.as_tag_str());
+            let tc = parse_tagged_criterion(&input).unwrap();
+            assert_eq!(tc.tag, *tag);
+        }
+    }
+
+    #[test]
+    fn parse_no_tag_returns_none() {
+        assert!(parse_tagged_criterion("Just some text").is_none());
+        assert!(parse_tagged_criterion("Has parens (but invalid)").is_none());
+    }
+
+    #[test]
+    fn parse_case_insensitive() {
+        let tc = parse_tagged_criterion("Tests pass (test)").unwrap();
+        assert_eq!(tc.tag, VerificationTag::Test);
+
+        let tc = parse_tagged_criterion("Lint clean (Lint)").unwrap();
+        assert_eq!(tc.tag, VerificationTag::Lint);
+    }
+
+    #[test]
+    fn parse_all_criteria_mixed() {
+        let criteria = vec![
+            "Tests pass (TEST)".into(),
+            "Untagged criterion".into(),
+            "No warnings (LINT)".into(),
+        ];
+        let (tagged, untagged) = parse_all_criteria(&criteria);
+        assert_eq!(tagged.len(), 2);
+        assert_eq!(untagged.len(), 1);
+        assert_eq!(untagged[0], "Untagged criterion");
+    }
+
+    #[test]
+    fn audit_all_tagged_passes() {
+        let criteria = vec!["Tests pass (TEST)".into(), "No warnings (LINT)".into()];
+        let result = audit_criteria(&criteria);
+        assert!(result.passed);
+        assert_eq!(result.tagged_criteria.len(), 2);
+    }
+
+    #[test]
+    fn audit_untagged_fails() {
+        let criteria = vec!["Tests pass (TEST)".into(), "Some untagged thing".into()];
+        let result = audit_criteria(&criteria);
+        assert!(!result.passed);
+        assert!(!result.feedback.is_empty());
+    }
+
+    #[test]
+    fn audit_vague_fails() {
+        let criteria = vec!["Make it better (TEST)".into()];
+        let result = audit_criteria(&criteria);
+        assert!(!result.passed);
+        assert!(result.feedback[0].contains("Vague"));
+    }
+
+    #[test]
+    fn audit_empty_passes() {
+        let result = audit_criteria(&[]);
+        assert!(result.passed);
+    }
+
+    #[test]
+    fn suggest_tag_keywords() {
+        assert_eq!(suggest_tag("No clippy warnings"), VerificationTag::Lint);
+        assert_eq!(
+            suggest_tag("P99 latency below 50ms"),
+            VerificationTag::Bench
+        );
+        assert_eq!(
+            suggest_tag("Dashboard shows status"),
+            VerificationTag::Browser
+        );
+        assert_eq!(
+            suggest_tag("No XSS vulnerabilities"),
+            VerificationTag::Security
+        );
+        assert_eq!(
+            suggest_tag("Manual review of docs"),
+            VerificationTag::Manual
+        );
+        assert_eq!(suggest_tag("All unit tests pass"), VerificationTag::Test);
+    }
+
+    #[test]
+    fn enrich_adds_tags() {
+        let criteria = vec![
+            "Tests pass (TEST)".into(),
+            "No clippy warnings".into(),
+            "P99 latency below 50ms".into(),
+        ];
+        let enriched = enrich_criteria(&criteria);
+        assert_eq!(enriched[0], "Tests pass (TEST)");
+        assert!(enriched[1].ends_with("(LINT)"));
+        assert!(enriched[2].ends_with("(BENCH)"));
+    }
+
+    #[test]
+    fn map_gate_results_links_checks() {
+        let criteria = vec![
+            TaggedCriterion {
+                description: "Tests pass".into(),
+                tag: VerificationTag::Test,
+                verifications: Vec::new(),
+            },
+            TaggedCriterion {
+                description: "No warnings".into(),
+                tag: VerificationTag::Lint,
+                verifications: Vec::new(),
+            },
+        ];
+
+        let checks = vec![
+            crate::task::CheckResult::simple("cargo_test", true, "", "", 0),
+            crate::task::CheckResult::simple("cargo_clippy", false, "", "warning found", 1),
+        ];
+
+        let updated = map_gate_results(&criteria, &checks);
+        assert_eq!(updated[0].verifications.len(), 1);
+        assert!(updated[0].verifications[0].passed);
+        assert_eq!(updated[0].verifications[0].check_name, "cargo_test");
+
+        assert_eq!(updated[1].verifications.len(), 1);
+        assert!(!updated[1].verifications[0].passed);
+        assert_eq!(updated[1].verifications[0].check_name, "cargo_clippy");
+    }
+
+    #[test]
+    fn verification_summary_counts() {
+        let criteria = vec![
+            TaggedCriterion {
+                description: "Tests pass".into(),
+                tag: VerificationTag::Test,
+                verifications: vec![CriterionVerification {
+                    check_name: "cargo_test".into(),
+                    passed: true,
+                    timestamp: chrono::Utc::now(),
+                }],
+            },
+            TaggedCriterion {
+                description: "No warnings".into(),
+                tag: VerificationTag::Lint,
+                verifications: vec![CriterionVerification {
+                    check_name: "cargo_clippy".into(),
+                    passed: false,
+                    timestamp: chrono::Utc::now(),
+                }],
+            },
+            TaggedCriterion {
+                description: "Perf ok".into(),
+                tag: VerificationTag::Bench,
+                verifications: Vec::new(),
+            },
+        ];
+
+        let (verified, failed, pending, total) = verification_summary(&criteria);
+        assert_eq!(verified, 1);
+        assert_eq!(failed, 1);
+        assert_eq!(pending, 1);
+        assert_eq!(total, 3);
+    }
+
+    #[test]
+    fn tagged_criterion_status_labels() {
+        let mut tc = TaggedCriterion {
+            description: "Test".into(),
+            tag: VerificationTag::Test,
+            verifications: Vec::new(),
+        };
+        assert_eq!(tc.status_label(), "pending");
+
+        tc.verifications.push(CriterionVerification {
+            check_name: "test".into(),
+            passed: false,
+            timestamp: chrono::Utc::now(),
+        });
+        assert_eq!(tc.status_label(), "failed");
+
+        tc.verifications.push(CriterionVerification {
+            check_name: "test".into(),
+            passed: true,
+            timestamp: chrono::Utc::now(),
+        });
+        assert_eq!(tc.status_label(), "verified");
+    }
+
+    #[test]
+    fn verification_tag_display() {
+        assert_eq!(format!("{}", VerificationTag::Test), "(TEST)");
+        assert_eq!(format!("{}", VerificationTag::Lint), "(LINT)");
+        assert_eq!(format!("{}", VerificationTag::Bench), "(BENCH)");
+        assert_eq!(format!("{}", VerificationTag::Manual), "(MANUAL)");
+        assert_eq!(format!("{}", VerificationTag::Browser), "(BROWSER)");
+        assert_eq!(format!("{}", VerificationTag::Security), "(SECURITY)");
+    }
+
+    #[test]
+    fn tagged_criterion_to_string() {
+        let tc = TaggedCriterion {
+            description: "All tests pass".into(),
+            tag: VerificationTag::Test,
+            verifications: Vec::new(),
+        };
+        assert_eq!(tc.to_tagged_string(), "All tests pass (TEST)");
+    }
+
+    // ─── VerificationReport tests ───────────────────────────────────────
+
+    #[test]
+    fn verification_report_from_criteria() {
+        let criteria = vec![
+            TaggedCriterion {
+                description: "Tests pass".into(),
+                tag: VerificationTag::Test,
+                verifications: vec![CriterionVerification {
+                    check_name: "cargo_test".into(),
+                    passed: true,
+                    timestamp: chrono::Utc::now(),
+                }],
+            },
+            TaggedCriterion {
+                description: "No warnings".into(),
+                tag: VerificationTag::Lint,
+                verifications: Vec::new(),
+            },
+        ];
+
+        let report = VerificationReport::from_criteria(42, &criteria);
+        assert_eq!(report.task_id, 42);
+        assert_eq!(report.total_count, 2);
+        assert_eq!(report.verified_count, 1);
+        assert_eq!(report.pending_count, 1);
+        assert_eq!(report.failed_count, 0);
+        assert!(!report.all_verified());
+        assert!(!report.has_failures());
+    }
+
+    #[test]
+    fn verification_report_all_verified() {
+        let criteria = vec![TaggedCriterion {
+            description: "Tests pass".into(),
+            tag: VerificationTag::Test,
+            verifications: vec![CriterionVerification {
+                check_name: "cargo_test".into(),
+                passed: true,
+                timestamp: chrono::Utc::now(),
+            }],
+        }];
+
+        let report = VerificationReport::from_criteria(1, &criteria);
+        assert!(report.all_verified());
+    }
+
+    #[test]
+    fn verification_report_display() {
+        let criteria = vec![TaggedCriterion {
+            description: "Tests pass".into(),
+            tag: VerificationTag::Test,
+            verifications: vec![CriterionVerification {
+                check_name: "cargo_test".into(),
+                passed: true,
+                timestamp: chrono::Utc::now(),
+            }],
+        }];
+
+        let report = VerificationReport::from_criteria(42, &criteria);
+        let display = format!("{report}");
+        assert!(display.contains("TASK-0042"));
+        assert!(display.contains("1/1 verified"));
+        assert!(display.contains("Tests pass"));
+    }
+
+    // ─── AuditLevel tests ───────────────────────────────────────────────
+
+    #[test]
+    fn audit_lenient_passes_with_untagged() {
+        let criteria = vec!["Some untagged criterion that is long enough".into()];
+        let result = audit_criteria_with_level(&criteria, AuditLevel::Lenient);
+        assert!(result.passed);
+        assert!(!result.feedback.is_empty()); // Still reports issues
+    }
+
+    #[test]
+    fn audit_strict_rejects_untagged() {
+        let criteria = vec!["Some untagged criterion that is long enough".into()];
+        let result = audit_criteria_with_level(&criteria, AuditLevel::Strict);
+        assert!(!result.passed);
+    }
+
+    // ─── Vague detection tests ──────────────────────────────────────────
+
+    #[test]
+    fn vague_detection_short_criterion() {
+        assert!(is_vague_criterion("fast"));
+        assert!(is_vague_criterion("works"));
+        assert!(is_vague_criterion("ok"));
+    }
+
+    #[test]
+    fn vague_detection_known_patterns() {
+        assert!(is_vague_criterion("Make it better somehow please"));
+        assert!(is_vague_criterion("Should work properly in all cases"));
+        assert!(is_vague_criterion("Handle edge cases for the feature"));
+        assert!(is_vague_criterion("Make it fast and responsive"));
+    }
+
+    #[test]
+    fn vague_detection_concrete_is_not_vague() {
+        assert!(!is_vague_criterion("P99 latency below 50ms on /api/tasks"));
+        assert!(!is_vague_criterion("No clippy warnings in crate"));
+        assert!(!is_vague_criterion("All unit tests pass without failures"));
+    }
+
+    #[test]
+    fn audit_rejects_very_short_criteria() {
+        let criteria = vec!["fast (BENCH)".into()];
+        let result = audit_criteria(&criteria);
+        assert!(!result.passed);
+        assert!(result.feedback[0].contains("Vague"));
+    }
+
+    #[test]
+    fn audit_rejects_new_vague_patterns() {
+        let criteria = vec!["Handle edge cases properly (TEST)".into()];
+        let result = audit_criteria(&criteria);
+        assert!(!result.passed);
+    }
+}
+
+#[cfg(test)]
+mod proptests {
+    use super::*;
+    use proptest::prelude::*;
+
+    /// Strategy to generate a random VerificationTag.
+    fn arb_tag() -> impl Strategy<Value = VerificationTag> {
+        prop_oneof![
+            Just(VerificationTag::Test),
+            Just(VerificationTag::Lint),
+            Just(VerificationTag::Bench),
+            Just(VerificationTag::Manual),
+            Just(VerificationTag::Browser),
+            Just(VerificationTag::Security),
+        ]
+    }
+
+    proptest! {
+        /// Tag → string → parse roundtrip always succeeds.
+        #[test]
+        fn tag_roundtrip(tag in arb_tag()) {
+            let tag_str = tag.as_tag_str();
+            // Extract inner: "(TEST)" → "TEST"
+            let inner = &tag_str[1..tag_str.len() - 1];
+            let parsed = VerificationTag::from_str_tag(inner).unwrap();
+            prop_assert_eq!(tag, parsed);
+        }
+
+        /// Tagged criterion → string → parse roundtrip.
+        #[test]
+        fn tagged_criterion_roundtrip(
+            desc in "[A-Za-z0-9 ]{10,50}",
+            tag in arb_tag(),
+        ) {
+            let tc = TaggedCriterion {
+                description: desc.clone(),
+                tag,
+                verifications: Vec::new(),
+            };
+            let s = tc.to_tagged_string();
+            let parsed = parse_tagged_criterion(&s).unwrap();
+            prop_assert_eq!(parsed.description.trim(), desc.trim());
+            prop_assert_eq!(parsed.tag, tag);
+        }
+
+        /// Enriched criteria always parse successfully.
+        #[test]
+        fn enriched_always_parses(desc in "[A-Za-z0-9 ]{5,50}") {
+            let criteria = vec![desc];
+            let enriched = enrich_criteria(&criteria);
+            for c in &enriched {
+                prop_assert!(parse_tagged_criterion(c).is_some(),
+                    "enriched criterion failed to parse: {c}");
+            }
+        }
+
+        /// Audit of all-tagged criteria with non-vague text always passes.
+        #[test]
+        fn audit_tagged_concrete_passes(
+            desc in "[A-Z][a-z]{15,40} passes correctly",
+            tag in arb_tag(),
+        ) {
+            let criterion = format!("{desc} {}", tag.as_tag_str());
+            let result = audit_criteria(&[criterion]);
+            // Should pass as long as the description is concrete (long enough, no vague patterns)
+            prop_assert!(result.passed, "audit failed for: {desc}");
+        }
+
+        /// map_gate_results preserves criterion count.
+        #[test]
+        fn map_preserves_count(count in 1usize..10) {
+            let criteria: Vec<TaggedCriterion> = (0..count)
+                .map(|i| TaggedCriterion {
+                    description: format!("Criterion {i}"),
+                    tag: VerificationTag::Test,
+                    verifications: Vec::new(),
+                })
+                .collect();
+            let checks = vec![crate::task::CheckResult::simple("cargo_test", true, "", "", 0)];
+            let mapped = map_gate_results(&criteria, &checks);
+            prop_assert_eq!(mapped.len(), count);
+        }
+    }
+}
diff --git a/crates/thrum-db/Cargo.toml b/crates/thrum-db/Cargo.toml
index 69585e0..9403113 100644
--- a/crates/thrum-db/Cargo.toml
+++ b/crates/thrum-db/Cargo.toml
@@ -18,6 +18,7 @@ tracing = { workspace = true }
 tempfile = "3"
 loom = { workspace = true }
 criterion = { workspace = true }
+toml = { workspace = true }
 
 [[bench]]
 name = "task_store"
diff --git a/crates/thrum-db/src/checkpoint_store.rs b/crates/thrum-db/src/checkpoint_store.rs
index f726e43..e8d108a 100644
--- a/crates/thrum-db/src/checkpoint_store.rs
+++ b/crates/thrum-db/src/checkpoint_store.rs
@@ -89,13 +89,7 @@ mod tests {
     fn sample_gate_report(level: GateLevel) -> GateReport {
         GateReport {
             level,
-            checks: vec![CheckResult {
-                name: "cargo_test".into(),
-                passed: true,
-                stdout: "ok".into(),
-                stderr: String::new(),
-                exit_code: 0,
-            }],
+            checks: vec![CheckResult::simple("cargo_test", true, "ok", "", 0)],
             passed: true,
             duration_secs: 5.0,
         }
diff --git a/crates/thrum-db/src/gate_store.rs b/crates/thrum-db/src/gate_store.rs
index 4d67edf..6cc09b0 100644
--- a/crates/thrum-db/src/gate_store.rs
+++ b/crates/thrum-db/src/gate_store.rs
@@ -85,13 +85,7 @@ mod tests {
 
         let report = GateReport {
             level: GateLevel::Quality,
-            checks: vec![CheckResult {
-                name: "cargo_test".into(),
-                passed: true,
-                stdout: "ok".into(),
-                stderr: String::new(),
-                exit_code: 0,
-            }],
+            checks: vec![CheckResult::simple("cargo_test", true, "ok", "", 0)],
             passed: true,
             duration_secs: 12.5,
         };
diff --git a/crates/thrum-db/src/memory_store.rs b/crates/thrum-db/src/memory_store.rs
index 7f32cae..3a0e237 100644
--- a/crates/thrum-db/src/memory_store.rs
+++ b/crates/thrum-db/src/memory_store.rs
@@ -123,6 +123,38 @@ impl<'a> MemoryStore<'a> {
         Ok(entries)
     }
 
+    /// Query error-category memories for a specific task, sorted by relevance descending.
+    ///
+    /// Unlike `query_errors_for_repo`, this only returns errors from the given task,
+    /// preventing cross-task contamination where error context from unrelated tasks
+    /// confuses the agent on retry.
+    pub fn query_errors_for_task(
+        &self,
+        task_id: &thrum_core::task::TaskId,
+        limit: usize,
+    ) -> Result<Vec<MemoryEntry>> {
+        let read_txn = self.db.begin_read()?;
+        let table = read_txn.open_table(MEMORY_TABLE)?;
+        let mut entries = Vec::new();
+
+        let iter = table.iter()?;
+        for item in iter {
+            let (_, value) = item?;
+            let entry: MemoryEntry = serde_json::from_str(value.value())?;
+            if entry.task_id == *task_id && entry.category.is_error() {
+                entries.push(entry);
+            }
+        }
+
+        entries.sort_by(|a, b| {
+            b.relevance_score
+                .partial_cmp(&a.relevance_score)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+        entries.truncate(limit);
+        Ok(entries)
+    }
+
     /// Touch a list of memory entries (bump access_count and last_accessed).
     ///
     /// Called after querying memories for prompt injection so that frequently
diff --git a/crates/thrum-db/src/task_store.rs b/crates/thrum-db/src/task_store.rs
index 67d8a60..ad1f9c0 100644
--- a/crates/thrum-db/src/task_store.rs
+++ b/crates/thrum-db/src/task_store.rs
@@ -1,6 +1,7 @@
 use anyhow::{Context, Result};
 use chrono::Utc;
 use redb::{Database, ReadableTable, TableDefinition};
+use std::collections::HashSet;
 use thrum_core::task::{RepoName, Task, TaskId, TaskStatus};
 
 /// Priority category for claiming the next task.
@@ -221,6 +222,87 @@ impl<'a> TaskStore<'a> {
         Ok(existed)
     }
 
+    /// Get all task IDs that have reached a terminal/merged status.
+    ///
+    /// Used for dependency resolution: a task's dependencies are "satisfied"
+    /// when all prerequisite tasks appear in this set.
+    pub fn completed_task_ids(&self) -> Result<HashSet<i64>> {
+        let read_txn = self.db.begin_read()?;
+        let tasks = read_txn.open_table(TASKS_TABLE)?;
+        let mut completed = HashSet::new();
+
+        let iter = tasks.iter()?;
+        for entry in iter {
+            let (key, value) = entry?;
+            let task: Task = serde_json::from_str(value.value())?;
+            if task.status.is_terminal() {
+                completed.insert(key.value());
+            }
+        }
+
+        Ok(completed)
+    }
+
+    /// Atomically claim the next eligible task, respecting dependencies.
+    ///
+    /// Like [`claim_next`] but skips tasks whose dependencies are not yet satisfied.
+    /// Uses the `completed` set to determine which prerequisites have finished.
+    pub fn claim_next_with_deps(
+        &self,
+        agent_id: &str,
+        category: ClaimCategory,
+        repo_filter: Option<&RepoName>,
+        completed: &HashSet<i64>,
+    ) -> Result<Option<Task>> {
+        let write_txn = self.db.begin_write()?;
+        let result = {
+            let mut tasks = write_txn.open_table(TASKS_TABLE)?;
+
+            let mut candidate: Option<Task> = None;
+            {
+                let iter = tasks.iter()?;
+                for entry in iter {
+                    let (_, value) = entry?;
+                    let task: Task = serde_json::from_str(value.value())?;
+
+                    if let Some(repo) = repo_filter
+                        && &task.repo != repo
+                    {
+                        continue;
+                    }
+
+                    let eligible = match category {
+                        ClaimCategory::RetryableFailed => {
+                            task.status.is_claimable_retry() && task.can_retry()
+                        }
+                        ClaimCategory::Approved => task.status.is_claimable_approved(),
+                        ClaimCategory::Pending => task.status.is_claimable_pending(),
+                    };
+
+                    if eligible && task.dependencies_satisfied(completed) {
+                        candidate = Some(task);
+                        break;
+                    }
+                }
+            }
+
+            if let Some(mut task) = candidate {
+                task.status = TaskStatus::Claimed {
+                    agent_id: agent_id.to_string(),
+                    claimed_at: Utc::now(),
+                };
+                task.updated_at = Utc::now();
+                let json = serde_json::to_string(&task)?;
+                tasks.insert(task.id.0, json.as_str())?;
+                Some(task)
+            } else {
+                None
+            }
+        };
+        write_txn.commit()?;
+        Ok(result)
+    }
+
     /// Count tasks by status.
     pub fn status_counts(&self) -> Result<std::collections::HashMap<String, usize>> {
         let read_txn = self.db.begin_read()?;
@@ -251,13 +333,7 @@ mod tests {
     fn failing_gate(level: GateLevel) -> GateReport {
         GateReport {
             level,
-            checks: vec![CheckResult {
-                name: "test".into(),
-                passed: false,
-                stdout: String::new(),
-                stderr: "fail".into(),
-                exit_code: 1,
-            }],
+            checks: vec![CheckResult::simple("test", false, "", "fail", 1)],
             passed: false,
             duration_secs: 0.5,
         }
@@ -475,4 +551,136 @@ mod tests {
         assert_eq!(claimed.title, "Failing");
         assert_eq!(claimed.status.label(), "claimed");
     }
+
+    #[test]
+    fn completed_task_ids() {
+        let db = test_db();
+        let store = TaskStore::new(&db);
+
+        let mut t1 = store
+            .insert(Task::new(
+                RepoName::new("loom"),
+                "Task A".into(),
+                "d".into(),
+            ))
+            .unwrap();
+        t1.status = TaskStatus::Merged {
+            commit_sha: "abc123".into(),
+        };
+        store.update(&t1).unwrap();
+
+        store
+            .insert(Task::new(
+                RepoName::new("loom"),
+                "Task B".into(),
+                "d".into(),
+            ))
+            .unwrap();
+
+        let completed = store.completed_task_ids().unwrap();
+        assert!(completed.contains(&1));
+        assert!(!completed.contains(&2));
+    }
+
+    #[test]
+    fn claim_next_with_deps_respects_dependencies() {
+        use thrum_core::dependency::TaskDependency;
+
+        let db = test_db();
+        let store = TaskStore::new(&db);
+
+        // Task 1: no dependencies (should be claimable)
+        store
+            .insert(Task::new(
+                RepoName::new("loom"),
+                "Task A".into(),
+                "d".into(),
+            ))
+            .unwrap();
+
+        // Task 2: depends on Task 1 (should NOT be claimable until 1 is done)
+        let mut t2 = Task::new(RepoName::new("loom"), "Task B".into(), "d".into());
+        t2.depends_on = vec![TaskDependency::hard(TaskId(1))];
+        store.insert(t2).unwrap();
+
+        // With empty completed set, only Task A should be claimable
+        let completed = HashSet::new();
+        let claimed = store
+            .claim_next_with_deps("agent-1", ClaimCategory::Pending, None, &completed)
+            .unwrap()
+            .unwrap();
+        assert_eq!(claimed.title, "Task A");
+
+        // Task B should not be claimable (dep not met)
+        let claimed2 = store
+            .claim_next_with_deps("agent-2", ClaimCategory::Pending, None, &completed)
+            .unwrap();
+        assert!(claimed2.is_none());
+
+        // After task 1 completes, task 2 becomes claimable
+        let mut completed_set = HashSet::new();
+        completed_set.insert(1i64);
+
+        let claimed3 = store
+            .claim_next_with_deps("agent-2", ClaimCategory::Pending, None, &completed_set)
+            .unwrap()
+            .unwrap();
+        assert_eq!(claimed3.title, "Task B");
+    }
+
+    /// Spec is stored in task metadata and survives roundtrip through DB.
+    #[test]
+    fn task_with_spec_roundtrip() {
+        use thrum_core::spec::{DesignSpec, Priority, ProofObligation, Spec, SpecRequirement};
+
+        let db = test_db();
+        let store = TaskStore::new(&db);
+
+        let spec = Spec {
+            title: "Add feature X".into(),
+            context: "Business requirement".into(),
+            requirements: vec![SpecRequirement {
+                id: "REQ-001".into(),
+                description: "Feature X must work".into(),
+                rationale: "Customer request".into(),
+                priority: Priority::P1,
+                safety_relevance: None,
+            }],
+            design: DesignSpec {
+                approach: "Modify module Y".into(),
+                affected_files: vec!["src/y.rs".into()],
+                interfaces: vec!["fn do_x() -> Result<()>".into()],
+                constraints: vec!["Must be backward compatible".into()],
+            },
+            acceptance_criteria: vec![
+                "Feature X works (TEST)".into(),
+                "Dashboard shows X (BROWSER)".into(),
+            ],
+            proof_obligations: vec![ProofObligation {
+                property: "X is safe".into(),
+                prover: "Z3".into(),
+                proof_file: Some("proofs/x.z3".into()),
+            }],
+            test_plan: vec!["Manual edge case testing".into()],
+        };
+
+        let mut task = Task::new(RepoName::new("loom"), "Add X".into(), "desc".into());
+        task.spec = Some(spec);
+        task.requirement_id = Some("REQ-001".into());
+
+        let inserted = store.insert(task).unwrap();
+        let fetched = store.get(&inserted.id).unwrap().unwrap();
+
+        // Spec survived the roundtrip
+        assert!(fetched.spec.is_some());
+        let fetched_spec = fetched.spec.unwrap();
+        assert_eq!(fetched_spec.title, "Add feature X");
+        assert_eq!(fetched_spec.requirements.len(), 1);
+        assert_eq!(fetched_spec.requirements[0].id, "REQ-001");
+        assert_eq!(fetched_spec.design.affected_files, vec!["src/y.rs"]);
+        assert_eq!(fetched_spec.proof_obligations.len(), 1);
+        assert_eq!(fetched_spec.proof_obligations[0].prover, "Z3");
+        assert_eq!(fetched_spec.acceptance_criteria.len(), 2);
+        assert_eq!(fetched.requirement_id, Some("REQ-001".into()));
+    }
 }
diff --git a/crates/thrum-db/src/trace_store.rs b/crates/thrum-db/src/trace_store.rs
index e9797fc..42129ec 100644
--- a/crates/thrum-db/src/trace_store.rs
+++ b/crates/thrum-db/src/trace_store.rs
@@ -86,4 +86,114 @@ impl<'a> TraceStore<'a> {
             None => Ok(None),
         }
     }
+
+    /// List all trace records, optionally filtered by task_id and/or requirement_id.
+    pub fn list_all(
+        &self,
+        task_id: Option<i64>,
+        requirement_id: Option<&str>,
+    ) -> Result<Vec<TraceRecord>> {
+        let read_txn = self.db.begin_read()?;
+        let traces = read_txn.open_table(TRACES_TABLE)?;
+        let mut result = Vec::new();
+
+        let iter = traces.iter()?;
+        for entry in iter {
+            let (_, value) = entry?;
+            let record: TraceRecord = serde_json::from_str(value.value())?;
+            if let Some(tid) = task_id
+                && record.task_id != tid
+            {
+                continue;
+            }
+            if let Some(rid) = requirement_id
+                && record.requirement_id != rid
+            {
+                continue;
+            }
+            result.push(record);
+        }
+
+        Ok(result)
+    }
+
+    /// Get the underlying database reference.
+    pub fn db(&self) -> &Database {
+        self.db
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use thrum_core::traceability::TraceArtifact;
+
+    fn test_db() -> (Database, tempfile::TempDir) {
+        let dir = tempfile::tempdir().unwrap();
+        let db_path = dir.path().join("test.redb");
+        let db = crate::open_db(&db_path).unwrap();
+        (db, dir)
+    }
+
+    #[test]
+    fn insert_and_get_trace_record() {
+        let (db, _dir) = test_db();
+        let store = TraceStore::new(&db);
+
+        let record = TraceRecord {
+            id: 0,
+            task_id: 42,
+            requirement_id: "REQ-001".into(),
+            artifact: TraceArtifact::Requirement {
+                title: "Test".into(),
+                description: "Desc".into(),
+            },
+            created_at: chrono::Utc::now(),
+        };
+
+        let inserted = store.insert(record).unwrap();
+        assert_eq!(inserted.id, 1);
+
+        let fetched = store.get(1).unwrap().unwrap();
+        assert_eq!(fetched.task_id, 42);
+        assert_eq!(fetched.requirement_id, "REQ-001");
+    }
+
+    #[test]
+    fn list_all_with_filters() {
+        let (db, _dir) = test_db();
+        let store = TraceStore::new(&db);
+
+        // Insert records for different tasks and requirements
+        for (task_id, req_id) in [(1, "REQ-001"), (1, "REQ-002"), (2, "REQ-001")] {
+            store
+                .insert(TraceRecord {
+                    id: 0,
+                    task_id,
+                    requirement_id: req_id.into(),
+                    artifact: TraceArtifact::Requirement {
+                        title: "T".into(),
+                        description: "D".into(),
+                    },
+                    created_at: chrono::Utc::now(),
+                })
+                .unwrap();
+        }
+
+        // No filter: all 3
+        let all = store.list_all(None, None).unwrap();
+        assert_eq!(all.len(), 3);
+
+        // Filter by task_id=1
+        let task1 = store.list_all(Some(1), None).unwrap();
+        assert_eq!(task1.len(), 2);
+
+        // Filter by requirement
+        let req001 = store.list_all(None, Some("REQ-001")).unwrap();
+        assert_eq!(req001.len(), 2);
+
+        // Filter by both
+        let both = store.list_all(Some(1), Some("REQ-001")).unwrap();
+        assert_eq!(both.len(), 1);
+    }
 }
diff --git a/crates/thrum-db/tests/lifecycle.rs b/crates/thrum-db/tests/lifecycle.rs
index 21b9363..04f91ab 100644
--- a/crates/thrum-db/tests/lifecycle.rs
+++ b/crates/thrum-db/tests/lifecycle.rs
@@ -15,13 +15,7 @@ fn test_db() -> redb::Database {
 fn passing_gate(level: GateLevel) -> GateReport {
     GateReport {
         level,
-        checks: vec![CheckResult {
-            name: "test_check".into(),
-            passed: true,
-            stdout: "all good".into(),
-            stderr: String::new(),
-            exit_code: 0,
-        }],
+        checks: vec![CheckResult::simple("test_check", true, "all good", "", 0)],
         passed: true,
         duration_secs: 1.0,
     }
@@ -31,13 +25,13 @@ fn passing_gate(level: GateLevel) -> GateReport {
 fn failing_gate(level: GateLevel) -> GateReport {
     GateReport {
         level,
-        checks: vec![CheckResult {
-            name: "test_check".into(),
-            passed: false,
-            stdout: String::new(),
-            stderr: "assertion failed".into(),
-            exit_code: 1,
-        }],
+        checks: vec![CheckResult::simple(
+            "test_check",
+            false,
+            "",
+            "assertion failed",
+            1,
+        )],
         passed: false,
         duration_secs: 0.5,
     }
@@ -96,6 +90,7 @@ fn happy_path_lifecycle() {
         reviewer_output: "LGTM".into(),
         gate1_report: gate1,
         gate2_report: Some(gate2),
+        trust_assessment: None,
     };
     task.status = TaskStatus::AwaitingApproval { summary };
     task.updated_at = chrono::Utc::now();
@@ -231,6 +226,7 @@ fn rejection_path() {
         reviewer_output: "needs work".into(),
         gate1_report: passing_gate(GateLevel::Quality),
         gate2_report: None,
+        trust_assessment: None,
     };
     task.status = TaskStatus::AwaitingApproval { summary };
     tasks.update(&task).unwrap();
@@ -355,6 +351,326 @@ fn claimed_status_lifecycle() {
     assert_eq!(fetched.status.label(), "implementing");
 }
 
+/// CI path: Approved → Integrating → AwaitingCI (push + PR) → Merged.
+///
+/// Exercises the CI-enabled flow where a task transitions through
+/// the full pipeline including the AwaitingCI state that tracks
+/// a pushed branch and created PR.
+#[test]
+fn ci_path_lifecycle() {
+    let db = test_db();
+    let tasks = TaskStore::new(&db);
+    let gates = GateStore::new(&db);
+
+    // Create and fast-forward to Approved
+    let mut task = tasks
+        .insert(Task::new(
+            RepoName::new("loom"),
+            "Add WASM SIMD support".into(),
+            "Implement SIMD instructions for the WASM backend".into(),
+        ))
+        .unwrap();
+
+    task.status = TaskStatus::Approved;
+    task.updated_at = chrono::Utc::now();
+    tasks.update(&task).unwrap();
+
+    // Step 1: Integrating (Gate 3 runs)
+    task.status = TaskStatus::Integrating;
+    task.updated_at = chrono::Utc::now();
+    tasks.update(&task).unwrap();
+    assert_eq!(task.status.label(), "integrating");
+
+    let gate3 = passing_gate(GateLevel::Integration);
+    gates.store(&task.id, &gate3).unwrap();
+
+    // Step 2: Push branch + create PR → AwaitingCI
+    let branch = task.branch_name();
+    let pr_number = 42u64;
+    let pr_url = "https://github.com/org/loom/pull/42".to_string();
+
+    task.status = TaskStatus::AwaitingCI {
+        pr_number,
+        pr_url: pr_url.clone(),
+        branch: branch.clone(),
+        started_at: chrono::Utc::now(),
+        ci_attempts: 0,
+    };
+    task.updated_at = chrono::Utc::now();
+    tasks.update(&task).unwrap();
+
+    // Verify AwaitingCI properties
+    assert_eq!(task.status.label(), "awaiting-ci");
+    assert!(task.status.is_awaiting_ci());
+    assert!(!task.status.is_terminal());
+    assert!(!task.status.needs_human());
+
+    // Verify the PR metadata is stored and retrievable
+    let fetched = tasks.get(&task.id).unwrap().unwrap();
+    match &fetched.status {
+        TaskStatus::AwaitingCI {
+            pr_number: pn,
+            pr_url: pu,
+            branch: br,
+            ci_attempts: ca,
+            ..
+        } => {
+            assert_eq!(*pn, 42);
+            assert_eq!(pu, "https://github.com/org/loom/pull/42");
+            assert_eq!(br, &branch);
+            assert_eq!(*ca, 0);
+        }
+        other => panic!("expected AwaitingCI, got {}", other.label()),
+    }
+
+    // Verify it shows up in status counts
+    let counts = tasks.status_counts().unwrap();
+    assert_eq!(counts.get("awaiting-ci"), Some(&1));
+
+    // Verify it shows up when listing by status
+    let ci_tasks = tasks.list(Some("awaiting-ci"), None).unwrap();
+    assert_eq!(ci_tasks.len(), 1);
+    assert_eq!(ci_tasks[0].id, task.id);
+
+    // Step 3: CI passes → Merged
+    task.status = TaskStatus::Merged {
+        commit_sha: "deadbeef123456".into(),
+    };
+    task.updated_at = chrono::Utc::now();
+    tasks.update(&task).unwrap();
+    assert!(task.status.is_terminal());
+}
+
+/// CI failure path: AwaitingCI → CIFailed after max retries.
+///
+/// Exercises the CI failure escalation path where the ci_fixer agent
+/// exhausts its retries and the task escalates to human review.
+#[test]
+fn ci_failure_escalation() {
+    let db = test_db();
+    let tasks = TaskStore::new(&db);
+
+    let mut task = tasks
+        .insert(Task::new(
+            RepoName::new("synth"),
+            "Fix ARM NEON codegen".into(),
+            "NEON intrinsics emit wrong opcodes".into(),
+        ))
+        .unwrap();
+
+    // Fast-forward to AwaitingCI
+    let branch = task.branch_name();
+    task.status = TaskStatus::AwaitingCI {
+        pr_number: 99,
+        pr_url: "https://github.com/org/synth/pull/99".into(),
+        branch: branch.clone(),
+        started_at: chrono::Utc::now(),
+        ci_attempts: 0,
+    };
+    task.updated_at = chrono::Utc::now();
+    tasks.update(&task).unwrap();
+
+    // Simulate ci_fixer retry: increment attempts and stay in AwaitingCI
+    task.status = TaskStatus::AwaitingCI {
+        pr_number: 99,
+        pr_url: "https://github.com/org/synth/pull/99".into(),
+        branch: branch.clone(),
+        started_at: chrono::Utc::now(),
+        ci_attempts: 1,
+    };
+    task.updated_at = chrono::Utc::now();
+    tasks.update(&task).unwrap();
+
+    // Verify ci_attempts incremented
+    let fetched = tasks.get(&task.id).unwrap().unwrap();
+    match &fetched.status {
+        TaskStatus::AwaitingCI { ci_attempts, .. } => {
+            assert_eq!(*ci_attempts, 1);
+        }
+        other => panic!("expected AwaitingCI, got {}", other.label()),
+    }
+
+    // Escalate to CIFailed after max retries
+    task.status = TaskStatus::CIFailed {
+        pr_number: 99,
+        pr_url: "https://github.com/org/synth/pull/99".into(),
+        failure_summary: "test_neon_simd failed: wrong opcode for vaddq_f32".into(),
+        ci_attempts: 4,
+    };
+    task.updated_at = chrono::Utc::now();
+    tasks.update(&task).unwrap();
+
+    // CIFailed needs human review
+    assert!(task.status.needs_human());
+    assert!(!task.status.is_terminal());
+    assert_eq!(task.status.label(), "ci-failed");
+
+    // Verify PR metadata preserved in CIFailed
+    let fetched = tasks.get(&task.id).unwrap().unwrap();
+    match &fetched.status {
+        TaskStatus::CIFailed {
+            pr_number,
+            pr_url,
+            failure_summary,
+            ci_attempts,
+        } => {
+            assert_eq!(*pr_number, 99);
+            assert_eq!(pr_url, "https://github.com/org/synth/pull/99");
+            assert!(failure_summary.contains("wrong opcode"));
+            assert_eq!(*ci_attempts, 4);
+        }
+        other => panic!("expected CIFailed, got {}", other.label()),
+    }
+
+    // Verify status counts
+    let counts = tasks.status_counts().unwrap();
+    assert_eq!(counts.get("ci-failed"), Some(&1));
+}
+
+/// CI integration is opt-in: when no [ci] section is present,
+/// the repo config has ci = None, and `ci.enabled` defaults to true
+/// only when explicitly specified.
+#[test]
+fn ci_config_opt_in() {
+    use std::path::PathBuf;
+    use thrum_core::repo::{CIConfig, RepoConfig};
+
+    // Default repo config: no CI section -> ci is None
+    let config = RepoConfig {
+        name: RepoName::new("my-project"),
+        path: PathBuf::from("/tmp/test"),
+        build_cmd: "cargo build".into(),
+        test_cmd: "cargo test".into(),
+        lint_cmd: "cargo clippy".into(),
+        fmt_cmd: "cargo fmt --check".into(),
+        verify_cmd: None,
+        proofs_cmd: None,
+        claude_md: None,
+        safety_target: None,
+        ci: None,
+        checks: thrum_core::repo::default_checks(),
+        mutants: None,
+        trust: None,
+    };
+
+    // When ci is None, CI is disabled (opt-in)
+    let ci_enabled = config.ci.as_ref().is_some_and(|ci| ci.enabled);
+    assert!(
+        !ci_enabled,
+        "CI should be disabled when no [ci] section is present"
+    );
+
+    // When ci section is present with defaults, CI is enabled
+    let config_with_ci = RepoConfig {
+        ci: Some(CIConfig::default()),
+        ..config.clone()
+    };
+    let ci_enabled = config_with_ci.ci.as_ref().is_some_and(|ci| ci.enabled);
+    assert!(
+        ci_enabled,
+        "CI should be enabled when [ci] section is present with defaults"
+    );
+
+    // When ci section is present but disabled, CI is off
+    let config_disabled = RepoConfig {
+        ci: Some(CIConfig {
+            enabled: false,
+            ..CIConfig::default()
+        }),
+        ..config
+    };
+    let ci_disabled = config_disabled.ci.as_ref().is_some_and(|ci| ci.enabled);
+    assert!(!ci_disabled, "CI should be disabled when enabled = false");
+}
+
+/// CI config parses from TOML with [repo.ci] section.
+#[test]
+fn ci_config_toml_parsing() {
+    use thrum_core::repo::ReposConfig;
+
+    let toml_str = r#"
+[[repo]]
+name = "my-project"
+path = "/tmp/test"
+build_cmd = "cargo build"
+test_cmd = "cargo test"
+lint_cmd = "cargo clippy"
+fmt_cmd = "cargo fmt --check"
+
+[repo.ci]
+enabled = true
+poll_interval_secs = 30
+max_ci_retries = 5
+auto_merge = false
+merge_strategy = "rebase"
+"#;
+
+    let config: ReposConfig = toml::from_str(toml_str).unwrap();
+    let repo = &config.repo[0];
+
+    let ci = repo.ci.as_ref().expect("CI config should be present");
+    assert!(ci.enabled);
+    assert_eq!(ci.poll_interval_secs, 30);
+    assert_eq!(ci.max_ci_retries, 5);
+    assert!(!ci.auto_merge);
+    assert_eq!(ci.merge_strategy, "rebase");
+}
+
+/// CI config defaults work when [repo.ci] section has no fields.
+#[test]
+fn ci_config_defaults_from_toml() {
+    use thrum_core::repo::ReposConfig;
+
+    let toml_str = r#"
+[[repo]]
+name = "my-project"
+path = "/tmp/test"
+build_cmd = "cargo build"
+test_cmd = "cargo test"
+lint_cmd = "cargo clippy"
+fmt_cmd = "cargo fmt --check"
+
+[repo.ci]
+"#;
+
+    let config: ReposConfig = toml::from_str(toml_str).unwrap();
+    let repo = &config.repo[0];
+
+    let ci = repo.ci.as_ref().expect("CI config should be present");
+    assert!(ci.enabled);
+    assert_eq!(ci.poll_interval_secs, 60);
+    assert_eq!(ci.max_ci_retries, 3);
+    assert!(ci.auto_merge);
+    assert_eq!(ci.merge_strategy, "squash");
+}
+
+/// CI disabled by default: repos without [ci] section skip CI.
+#[test]
+fn ci_disabled_by_default_in_toml() {
+    use thrum_core::repo::ReposConfig;
+
+    let toml_str = r#"
+[[repo]]
+name = "my-project"
+path = "/tmp/test"
+build_cmd = "cargo build"
+test_cmd = "cargo test"
+lint_cmd = "cargo clippy"
+fmt_cmd = "cargo fmt --check"
+"#;
+
+    let config: ReposConfig = toml::from_str(toml_str).unwrap();
+    let repo = &config.repo[0];
+
+    // No [repo.ci] section → ci is None → CI disabled
+    assert!(
+        repo.ci.is_none(),
+        "CI config should be None when not specified"
+    );
+    let ci_enabled = repo.ci.as_ref().is_some_and(|ci| ci.enabled);
+    assert!(!ci_enabled, "CI should be disabled when no [ci] section");
+}
+
 /// Spec-based task preserves spec through serialization roundtrip.
 #[test]
 fn spec_roundtrip() {
@@ -376,3 +692,95 @@ fn spec_roundtrip() {
     assert!(fetched.spec.is_some());
     assert_eq!(fetched.spec.unwrap().title, "Add popcnt");
 }
+
+/// Trust assessment roundtrips through the database when stored in CheckpointSummary.
+#[test]
+fn trust_assessment_roundtrip() {
+    use thrum_core::trust::{RiskLevel, TrustConfig};
+
+    let db = test_db();
+    let tasks = TaskStore::new(&db);
+
+    let mut task = tasks
+        .insert(Task::new(
+            RepoName::new("loom"),
+            "Touch crypto files".into(),
+            "Changes to key derivation".into(),
+        ))
+        .unwrap();
+
+    // Build a trust assessment
+    let config = TrustConfig {
+        high_risk: vec!["src/crypto/**".into()],
+        security_sensitive: vec!["Cargo.toml".into()],
+        auto_ok: vec!["docs/**".into()],
+    };
+    let assessment = config.assess(&[
+        "src/crypto/aes.rs".into(),
+        "src/main.rs".into(),
+        "docs/readme.md".into(),
+    ]);
+
+    assert_eq!(assessment.overall_risk, RiskLevel::HighRisk);
+    assert!(assessment.requires_human_review);
+    assert!(assessment.triggers_security_checks);
+
+    // Store in CheckpointSummary
+    let summary = CheckpointSummary {
+        diff_summary: "3 files changed".into(),
+        reviewer_output: "LGTM".into(),
+        gate1_report: passing_gate(GateLevel::Quality),
+        gate2_report: None,
+        trust_assessment: Some(assessment.clone()),
+    };
+    task.status = TaskStatus::AwaitingApproval { summary };
+    task.updated_at = chrono::Utc::now();
+    tasks.update(&task).unwrap();
+
+    // Fetch and verify roundtrip
+    let fetched = tasks.get(&task.id).unwrap().unwrap();
+    if let TaskStatus::AwaitingApproval { ref summary } = fetched.status {
+        let ta = summary
+            .trust_assessment
+            .as_ref()
+            .expect("trust assessment should survive roundtrip");
+        assert_eq!(ta.overall_risk, RiskLevel::HighRisk);
+        assert!(ta.requires_human_review);
+        assert!(ta.triggers_security_checks);
+        assert_eq!(ta.file_risks.len(), 3);
+        assert_eq!(ta.file_risks[0].1, RiskLevel::HighRisk);
+        assert_eq!(ta.file_risks[1].1, RiskLevel::Standard);
+        assert_eq!(ta.file_risks[2].1, RiskLevel::AutoOk);
+    } else {
+        panic!("expected AwaitingApproval status");
+    }
+}
+
+/// High-risk trust assessment cannot be auto-approved (verifies the data model
+/// that the dashboard and CLI use to block approval).
+#[test]
+fn trust_assessment_blocks_approval_data_model() {
+    use thrum_core::trust::{RiskLevel, TrustConfig};
+
+    let config = TrustConfig {
+        high_risk: vec!["src/gate.rs".into(), "src/budget.rs".into()],
+        security_sensitive: vec!["src/api.rs".into()],
+        auto_ok: vec!["assets/*".into()],
+    };
+
+    // Case 1: Only auto-ok files → approval is fine
+    let safe = config.assess(&["assets/logo.png".into()]);
+    assert!(!safe.requires_human_review);
+    assert_eq!(safe.overall_risk, RiskLevel::AutoOk);
+
+    // Case 2: High-risk file → approval must be blocked
+    let risky = config.assess(&["src/gate.rs".into(), "assets/logo.png".into()]);
+    assert!(risky.requires_human_review);
+    assert_eq!(risky.overall_risk, RiskLevel::HighRisk);
+
+    // Case 3: Security-sensitive → extra checks but approval OK
+    let sensitive = config.assess(&["src/api.rs".into()]);
+    assert!(!sensitive.requires_human_review);
+    assert!(sensitive.triggers_security_checks);
+    assert_eq!(sensitive.overall_risk, RiskLevel::SecuritySensitive);
+}
diff --git a/crates/thrum-db/tests/parallel_claim.rs b/crates/thrum-db/tests/parallel_claim.rs
index 78a8140..711193b 100644
--- a/crates/thrum-db/tests/parallel_claim.rs
+++ b/crates/thrum-db/tests/parallel_claim.rs
@@ -115,13 +115,7 @@ fn claim_category_priority() {
     failed.status = TaskStatus::Gate1Failed {
         report: GateReport {
             level: GateLevel::Quality,
-            checks: vec![CheckResult {
-                name: "test".into(),
-                passed: false,
-                stdout: String::new(),
-                stderr: "fail".into(),
-                exit_code: 1,
-            }],
+            checks: vec![CheckResult::simple("test", false, "", "fail", 1)],
             passed: false,
             duration_secs: 0.5,
         },
diff --git a/crates/thrum-runner/Cargo.toml b/crates/thrum-runner/Cargo.toml
index 1a2c020..9cfbbd1 100644
--- a/crates/thrum-runner/Cargo.toml
+++ b/crates/thrum-runner/Cargo.toml
@@ -10,8 +10,6 @@ thrum-db = { workspace = true }
 tokio = { workspace = true }
 tokio-util = { workspace = true }
 async-trait = { workspace = true }
-reqwest = { workspace = true }
-async-openai = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
 git2 = { workspace = true }
@@ -22,6 +20,7 @@ anyhow = { workspace = true }
 thiserror = { workspace = true }
 tracing = { workspace = true }
 chrono = { workspace = true }
+libc = { workspace = true }
 bollard = { workspace = true }
 toml = { workspace = true }
 futures-util = { workspace = true }
diff --git a/crates/thrum-runner/src/anthropic.rs b/crates/thrum-runner/src/anthropic.rs
deleted file mode 100644
index c0b1ee4..0000000
--- a/crates/thrum-runner/src/anthropic.rs
+++ /dev/null
@@ -1,193 +0,0 @@
-//! Anthropic Messages API backend (direct HTTP, no CLI needed).
-//!
-//! Uses reqwest against `https://api.anthropic.com/v1/messages`.
-//! Chat-only: returns text, cannot edit files or run commands.
-//! Good for reviews, planning, and headless/CI operation.
-
-use crate::backend::{AiBackend, AiRequest, AiResponse, BackendCapability};
-use anyhow::{Context, Result};
-use async_trait::async_trait;
-use serde::{Deserialize, Serialize};
-
-const ANTHROPIC_API_URL: &str = "https://api.anthropic.com/v1/messages";
-const ANTHROPIC_VERSION: &str = "2023-06-01";
-
-/// Anthropic Messages API backend.
-pub struct AnthropicApiBackend {
-    client: reqwest::Client,
-    api_key: String,
-    model: String,
-    max_tokens: u32,
-}
-
-impl AnthropicApiBackend {
-    /// Create from API key and model.
-    /// Model examples: "claude-sonnet-4-5-20250929", "claude-opus-4-6", "claude-haiku-4-5-20251001"
-    pub fn new(api_key: String, model: String) -> Self {
-        Self {
-            client: reqwest::Client::new(),
-            api_key,
-            model,
-            max_tokens: 4096,
-        }
-    }
-
-    /// Create from environment variable `ANTHROPIC_API_KEY`.
-    pub fn from_env(model: &str) -> Result<Self> {
-        let api_key = std::env::var("ANTHROPIC_API_KEY").context("ANTHROPIC_API_KEY not set")?;
-        Ok(Self::new(api_key, model.to_string()))
-    }
-
-    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
-        self.max_tokens = max_tokens;
-        self
-    }
-}
-
-#[async_trait]
-impl AiBackend for AnthropicApiBackend {
-    fn name(&self) -> &str {
-        "anthropic-api"
-    }
-
-    fn capability(&self) -> BackendCapability {
-        BackendCapability::Chat
-    }
-
-    fn model(&self) -> &str {
-        &self.model
-    }
-
-    async fn invoke(&self, request: &AiRequest) -> Result<AiResponse> {
-        let max_tokens = request.max_tokens.unwrap_or(self.max_tokens);
-
-        let messages = vec![Message {
-            role: "user".into(),
-            content: request.prompt.clone(),
-        }];
-
-        let body = MessagesRequest {
-            model: self.model.clone(),
-            max_tokens,
-            system: request.system_prompt.clone(),
-            messages,
-        };
-
-        tracing::info!(
-            model = %self.model,
-            prompt_len = request.prompt.len(),
-            "invoking Anthropic Messages API"
-        );
-
-        let response = self
-            .client
-            .post(ANTHROPIC_API_URL)
-            .header("x-api-key", &self.api_key)
-            .header("anthropic-version", ANTHROPIC_VERSION)
-            .header("content-type", "application/json")
-            .json(&body)
-            .send()
-            .await
-            .context("failed to send Anthropic API request")?;
-
-        if !response.status().is_success() {
-            let status = response.status();
-            let body = response.text().await.unwrap_or_default();
-            anyhow::bail!("Anthropic API error ({status}): {body}");
-        }
-
-        let resp: MessagesResponse = response
-            .json()
-            .await
-            .context("failed to parse Anthropic response")?;
-
-        let content = resp
-            .content
-            .iter()
-            .filter_map(|block| {
-                if block.block_type == "text" {
-                    block.text.as_deref()
-                } else {
-                    None
-                }
-            })
-            .collect::<Vec<_>>()
-            .join("\n");
-
-        Ok(AiResponse {
-            content,
-            model: resp.model,
-            input_tokens: Some(resp.usage.input_tokens),
-            output_tokens: Some(resp.usage.output_tokens),
-            timed_out: false,
-            exit_code: None,
-            session_id: None,
-        })
-    }
-
-    async fn health_check(&self) -> Result<()> {
-        // Minimal request to check API key validity
-        let body = MessagesRequest {
-            model: self.model.clone(),
-            max_tokens: 1,
-            system: None,
-            messages: vec![Message {
-                role: "user".into(),
-                content: "ping".into(),
-            }],
-        };
-
-        let response = self
-            .client
-            .post(ANTHROPIC_API_URL)
-            .header("x-api-key", &self.api_key)
-            .header("anthropic-version", ANTHROPIC_VERSION)
-            .header("content-type", "application/json")
-            .json(&body)
-            .send()
-            .await?;
-
-        if response.status().is_success() {
-            Ok(())
-        } else {
-            anyhow::bail!("Anthropic API health check failed: {}", response.status())
-        }
-    }
-}
-
-// ─── API types ─────────────────────────────────────────────────────────
-
-#[derive(Serialize)]
-struct MessagesRequest {
-    model: String,
-    max_tokens: u32,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    system: Option<String>,
-    messages: Vec<Message>,
-}
-
-#[derive(Serialize)]
-struct Message {
-    role: String,
-    content: String,
-}
-
-#[derive(Deserialize)]
-struct MessagesResponse {
-    model: String,
-    content: Vec<ContentBlock>,
-    usage: Usage,
-}
-
-#[derive(Deserialize)]
-struct ContentBlock {
-    #[serde(rename = "type")]
-    block_type: String,
-    text: Option<String>,
-}
-
-#[derive(Deserialize)]
-struct Usage {
-    input_tokens: u64,
-    output_tokens: u64,
-}
diff --git a/crates/thrum-runner/src/backend.rs b/crates/thrum-runner/src/backend.rs
deleted file mode 100644
index 3fe66da..0000000
--- a/crates/thrum-runner/src/backend.rs
+++ /dev/null
@@ -1,564 +0,0 @@
-//! AI backend abstraction for multi-provider support.
-//!
-//! Two categories of backends:
-//! - **Agent backends** (CLI-based): Can edit files, run commands, use git.
-//!   Examples: Claude Code CLI, Vibe, OpenCode.
-//! - **Chat backends** (API-based): Return text responses only.
-//!   Examples: Anthropic Messages API, Mistral/Devstral2 via OpenAI-compat.
-//!
-//! Agent backends are preferred for implementation tasks.
-//! Chat backends are used for reviews, planning, and headless operation.
-
-use anyhow::Result;
-use async_trait::async_trait;
-use serde::{Deserialize, Serialize};
-use std::path::PathBuf;
-
-/// Capability level of a backend.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub enum BackendCapability {
-    /// Full agent: can edit files, run terminal commands, use git.
-    /// Invoked via CLI (e.g., `claude -p`, `vibe`, `opencode`).
-    Agent,
-    /// Chat only: returns text responses. No file/terminal access.
-    /// Invoked via HTTP API.
-    Chat,
-}
-
-/// Result from an AI backend invocation.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct AiResponse {
-    /// The text output from the AI.
-    pub content: String,
-    /// Model used (e.g., "claude-opus-4-6", "devstral-small-2505").
-    pub model: String,
-    /// Input tokens consumed.
-    pub input_tokens: Option<u64>,
-    /// Output tokens produced.
-    pub output_tokens: Option<u64>,
-    /// Whether the invocation timed out.
-    pub timed_out: bool,
-    /// Exit code (for CLI-based backends).
-    pub exit_code: Option<i32>,
-    /// Session ID from the backend, used for session continuation on retries.
-    /// Claude Code returns this in its JSON output; OpenCode tracks it internally.
-    #[serde(default)]
-    pub session_id: Option<String>,
-}
-
-/// Configuration for an AI invocation.
-#[derive(Debug, Clone)]
-pub struct AiRequest {
-    /// The prompt to send.
-    pub prompt: String,
-    /// System prompt / instructions.
-    pub system_prompt: Option<String>,
-    /// Working directory (for agent backends).
-    pub cwd: Option<PathBuf>,
-    /// Maximum tokens to generate.
-    pub max_tokens: Option<u32>,
-    /// Temperature (0.0 - 1.0).
-    pub temperature: Option<f32>,
-    /// Session ID from a previous invocation, used to resume the session.
-    /// Claude Code uses `--resume {id}`, OpenCode uses `-s {id}`.
-    pub resume_session_id: Option<String>,
-}
-
-impl AiRequest {
-    pub fn new(prompt: impl Into<String>) -> Self {
-        Self {
-            prompt: prompt.into(),
-            system_prompt: None,
-            cwd: None,
-            max_tokens: None,
-            temperature: None,
-            resume_session_id: None,
-        }
-    }
-
-    pub fn with_system(mut self, system: impl Into<String>) -> Self {
-        self.system_prompt = Some(system.into());
-        self
-    }
-
-    pub fn with_cwd(mut self, cwd: PathBuf) -> Self {
-        self.cwd = Some(cwd);
-        self
-    }
-
-    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
-        self.max_tokens = Some(max_tokens);
-        self
-    }
-
-    pub fn with_resume_session(mut self, session_id: String) -> Self {
-        self.resume_session_id = Some(session_id);
-        self
-    }
-}
-
-/// Trait for all AI backends (both agent and chat).
-#[async_trait]
-pub trait AiBackend: Send + Sync {
-    /// Human-readable name of this backend.
-    fn name(&self) -> &str;
-
-    /// What this backend can do.
-    fn capability(&self) -> BackendCapability;
-
-    /// Model identifier used by this backend.
-    fn model(&self) -> &str;
-
-    /// Invoke the AI with a request.
-    async fn invoke(&self, request: &AiRequest) -> Result<AiResponse>;
-
-    /// Check if the backend is available (e.g., API key set, CLI installed).
-    async fn health_check(&self) -> Result<()>;
-}
-
-/// Registry of available backends with routing logic.
-///
-/// Backends can be registered programmatically or built from `[[backends]]` config
-/// in pipeline.toml, enabling any coding agent to be swapped in without code changes.
-pub struct BackendRegistry {
-    backends: Vec<Box<dyn AiBackend>>,
-}
-
-impl BackendRegistry {
-    pub fn new() -> Self {
-        Self {
-            backends: Vec::new(),
-        }
-    }
-
-    pub fn register(&mut self, backend: Box<dyn AiBackend>) {
-        self.backends.push(backend);
-    }
-
-    /// Get the best agent backend (for implementation tasks).
-    pub fn agent(&self) -> Option<&dyn AiBackend> {
-        self.backends
-            .iter()
-            .find(|b| b.capability() == BackendCapability::Agent)
-            .map(|b| b.as_ref())
-    }
-
-    /// Get the best chat backend (for reviews, planning).
-    pub fn chat(&self) -> Option<&dyn AiBackend> {
-        self.backends
-            .iter()
-            .find(|b| b.capability() == BackendCapability::Chat)
-            .map(|b| b.as_ref())
-    }
-
-    /// Get a specific backend by name.
-    pub fn get(&self, name: &str) -> Option<&dyn AiBackend> {
-        self.backends
-            .iter()
-            .find(|b| b.name() == name)
-            .map(|b| b.as_ref())
-    }
-
-    /// Resolve a role's backend preference to an actual registered backend.
-    ///
-    /// Resolution order:
-    /// 1. Exact match by name (e.g., role.backend = "claude-code" → backend named "claude-code")
-    /// 2. Model substring match (e.g., role.backend = "opus" → backend whose model contains "opus")
-    /// 3. Capability fallback (implementer needs Agent, reviewer needs Chat)
-    pub fn resolve_role(&self, role: &thrum_core::role::AgentRole) -> Option<&dyn AiBackend> {
-        let query = &role.backend;
-
-        // 1. Exact name match
-        if let Some(b) = self.get(query) {
-            return Some(b);
-        }
-
-        // 2. Model substring match (case-insensitive)
-        let query_lower = query.to_lowercase();
-        if let Some(b) = self
-            .backends
-            .iter()
-            .find(|b| b.model().to_lowercase().contains(&query_lower))
-        {
-            return Some(b.as_ref());
-        }
-
-        // 3. Capability-based fallback: agent backends for "opus"/"haiku", chat for "sonnet"
-        let prefer_chat = query_lower.contains("sonnet") || query_lower.contains("haiku");
-        if prefer_chat {
-            self.chat().or_else(|| self.agent())
-        } else {
-            self.agent().or_else(|| self.chat())
-        }
-    }
-
-    /// List all registered backends.
-    pub fn list(&self) -> Vec<(&str, BackendCapability, &str)> {
-        self.backends
-            .iter()
-            .map(|b| (b.name(), b.capability(), b.model()))
-            .collect()
-    }
-
-    /// Number of registered backends.
-    pub fn len(&self) -> usize {
-        self.backends.len()
-    }
-
-    /// Whether the registry is empty.
-    pub fn is_empty(&self) -> bool {
-        self.backends.is_empty()
-    }
-}
-
-impl Default for BackendRegistry {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-/// Build a `BackendRegistry` from declarative `[[backends]]` config entries.
-///
-/// This is the key to backend-agnostic operation: any coding agent that accepts
-/// a prompt and returns output can be configured without code changes.
-pub fn build_registry_from_config(
-    configs: &[thrum_core::role::BackendConfig],
-    default_cwd: &std::path::Path,
-) -> Result<BackendRegistry> {
-    let mut registry = BackendRegistry::new();
-
-    for cfg in configs {
-        if !cfg.enabled {
-            tracing::debug!(name = %cfg.name, "skipping disabled backend");
-            continue;
-        }
-
-        let timeout = std::time::Duration::from_secs(cfg.timeout_secs.unwrap_or(1200));
-
-        match cfg.backend_type.as_str() {
-            "agent" => {
-                // Special case: "claude" command uses the dedicated ClaudeCliBackend
-                // for its JSON output parsing. Everything else uses CliAgentBackend.
-                if cfg.command.as_deref() == Some("claude") {
-                    let mut backend =
-                        crate::claude::ClaudeCliBackend::new(default_cwd.to_path_buf());
-                    backend.timeout = timeout;
-                    backend.skip_permissions = true; // Required for non-interactive automation
-                    registry.register(Box::new(backend));
-                } else if let Some(ref command) = cfg.command {
-                    let prompt_args = cfg
-                        .prompt_args
-                        .clone()
-                        .unwrap_or_else(|| vec!["-m".into(), "{prompt}".into()]);
-                    // Infer session flag from known tools
-                    let session_flag = match command.as_str() {
-                        "opencode" => Some("-s".into()),
-                        _ => None,
-                    };
-                    let backend = crate::cli_agent::CliAgentBackend {
-                        name: cfg.name.clone(),
-                        command: command.clone(),
-                        prompt_args,
-                        model_name: cfg.model.clone().unwrap_or_else(|| "unknown".into()),
-                        default_cwd: default_cwd.to_path_buf(),
-                        timeout,
-                        session_flag,
-                    };
-                    registry.register(Box::new(backend));
-                } else {
-                    tracing::warn!(name = %cfg.name, "agent backend missing 'command' field, skipping");
-                }
-            }
-            "chat" => {
-                let provider = cfg.provider.as_deref().unwrap_or("anthropic");
-                let model = cfg.model.as_deref().unwrap_or("claude-sonnet-4-5-20250929");
-                let api_key_env = cfg.api_key_env.as_deref().unwrap_or(match provider {
-                    "anthropic" => "ANTHROPIC_API_KEY",
-                    "mistral" => "MISTRAL_API_KEY",
-                    "openai" => "OPENAI_API_KEY",
-                    _ => "OPENAI_API_KEY",
-                });
-
-                match std::env::var(api_key_env) {
-                    Ok(api_key) => match provider {
-                        "anthropic" => {
-                            let backend = crate::anthropic::AnthropicApiBackend::new(
-                                api_key,
-                                model.to_string(),
-                            );
-                            registry.register(Box::new(backend));
-                        }
-                        "mistral" => {
-                            let backend = crate::openai_compat::OpenAiCompatBackend::new(
-                                crate::openai_compat::Provider::Mistral,
-                                api_key,
-                                model.to_string(),
-                            );
-                            registry.register(Box::new(backend));
-                        }
-                        "openai" => {
-                            let backend = crate::openai_compat::OpenAiCompatBackend::new(
-                                crate::openai_compat::Provider::OpenAi,
-                                api_key,
-                                model.to_string(),
-                            );
-                            registry.register(Box::new(backend));
-                        }
-                        _ => {
-                            let base_url = cfg
-                                .base_url
-                                .clone()
-                                .unwrap_or_else(|| "https://api.openai.com/v1".into());
-                            let backend = crate::openai_compat::OpenAiCompatBackend::new(
-                                crate::openai_compat::Provider::Custom { base_url },
-                                api_key,
-                                model.to_string(),
-                            );
-                            registry.register(Box::new(backend));
-                        }
-                    },
-                    Err(_) => {
-                        tracing::debug!(
-                            name = %cfg.name,
-                            env = api_key_env,
-                            "chat backend API key not set, skipping"
-                        );
-                    }
-                }
-            }
-            other => {
-                tracing::warn!(name = %cfg.name, backend_type = other, "unknown backend type, skipping");
-            }
-        }
-    }
-
-    Ok(registry)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use thrum_core::role::AgentRole;
-
-    /// A mock backend for testing routing logic without real CLI/API calls.
-    struct MockBackend {
-        mock_name: String,
-        mock_model: String,
-        mock_capability: BackendCapability,
-    }
-
-    impl MockBackend {
-        fn agent(name: &str, model: &str) -> Box<dyn AiBackend> {
-            Box::new(Self {
-                mock_name: name.into(),
-                mock_model: model.into(),
-                mock_capability: BackendCapability::Agent,
-            })
-        }
-
-        fn chat(name: &str, model: &str) -> Box<dyn AiBackend> {
-            Box::new(Self {
-                mock_name: name.into(),
-                mock_model: model.into(),
-                mock_capability: BackendCapability::Chat,
-            })
-        }
-    }
-
-    #[async_trait]
-    impl AiBackend for MockBackend {
-        fn name(&self) -> &str {
-            &self.mock_name
-        }
-        fn capability(&self) -> BackendCapability {
-            self.mock_capability
-        }
-        fn model(&self) -> &str {
-            &self.mock_model
-        }
-        async fn invoke(&self, _request: &AiRequest) -> Result<AiResponse> {
-            Ok(AiResponse {
-                content: format!("mock response from {}", self.mock_name),
-                model: self.mock_model.clone(),
-                input_tokens: None,
-                output_tokens: None,
-                timed_out: false,
-                exit_code: None,
-                session_id: None,
-            })
-        }
-        async fn health_check(&self) -> Result<()> {
-            Ok(())
-        }
-    }
-
-    fn make_role(backend: &str) -> AgentRole {
-        AgentRole {
-            backend: backend.into(),
-            prompt_template: "agents/test.md".into(),
-            budget_usd: Some(1.0),
-            timeout_secs: Some(60),
-        }
-    }
-
-    /// Build a registry with multiple backends simulating a real multi-provider setup.
-    fn multi_provider_registry() -> BackendRegistry {
-        let mut reg = BackendRegistry::new();
-        reg.register(MockBackend::agent("claude-code", "claude-opus-4-6"));
-        reg.register(MockBackend::agent("opencode", "devstral-small-2505"));
-        reg.register(MockBackend::chat(
-            "anthropic-api",
-            "claude-sonnet-4-5-20250929",
-        ));
-        reg.register(MockBackend::chat("mistral-api", "devstral-small-2505"));
-        reg
-    }
-
-    #[test]
-    fn resolve_role_exact_name() {
-        let reg = multi_provider_registry();
-        let role = make_role("opencode");
-        let backend = reg.resolve_role(&role).unwrap();
-        assert_eq!(backend.name(), "opencode");
-    }
-
-    #[test]
-    fn resolve_role_model_substring() {
-        let reg = multi_provider_registry();
-        // "opus" should match "claude-opus-4-6"
-        let role = make_role("opus");
-        let backend = reg.resolve_role(&role).unwrap();
-        assert_eq!(backend.name(), "claude-code");
-    }
-
-    #[test]
-    fn resolve_role_sonnet_prefers_chat() {
-        let reg = multi_provider_registry();
-        let role = make_role("sonnet");
-        let backend = reg.resolve_role(&role).unwrap();
-        // "sonnet" substring matches the chat backend's model
-        assert_eq!(backend.name(), "anthropic-api");
-    }
-
-    #[test]
-    fn resolve_role_unknown_falls_back_to_agent() {
-        let reg = multi_provider_registry();
-        let role = make_role("some-unknown-backend");
-        let backend = reg.resolve_role(&role).unwrap();
-        // Falls back to first agent
-        assert_eq!(backend.capability(), BackendCapability::Agent);
-    }
-
-    #[test]
-    fn registry_basic_ops() {
-        let reg = multi_provider_registry();
-        assert_eq!(reg.len(), 4);
-        assert!(!reg.is_empty());
-        assert!(reg.agent().is_some());
-        assert!(reg.chat().is_some());
-        assert!(reg.get("mistral-api").is_some());
-        assert!(reg.get("nonexistent").is_none());
-    }
-
-    #[test]
-    fn config_driven_agent_backends() {
-        let configs = vec![
-            thrum_core::role::BackendConfig {
-                name: "claude-code".into(),
-                backend_type: "agent".into(),
-                command: Some("claude".into()),
-                prompt_args: Some(vec!["-p".into(), "{prompt}".into()]),
-                model: Some("claude-opus-4-6".into()),
-                provider: None,
-                base_url: None,
-                api_key_env: None,
-                timeout_secs: Some(300),
-                enabled: true,
-            },
-            thrum_core::role::BackendConfig {
-                name: "opencode".into(),
-                backend_type: "agent".into(),
-                command: Some("opencode".into()),
-                prompt_args: None,
-                model: Some("devstral-small-2505".into()),
-                provider: None,
-                base_url: None,
-                api_key_env: None,
-                timeout_secs: None,
-                enabled: true,
-            },
-            thrum_core::role::BackendConfig {
-                name: "disabled-agent".into(),
-                backend_type: "agent".into(),
-                command: Some("should-not-appear".into()),
-                prompt_args: None,
-                model: None,
-                provider: None,
-                base_url: None,
-                api_key_env: None,
-                timeout_secs: None,
-                enabled: false,
-            },
-        ];
-
-        let cwd = std::env::temp_dir();
-        let registry = build_registry_from_config(&configs, &cwd).unwrap();
-
-        // Two enabled agents registered (disabled one skipped)
-        assert_eq!(registry.len(), 2);
-        assert!(registry.get("claude-code").is_some() || registry.agent().is_some());
-        // Disabled backend should not appear
-        assert!(registry.get("disabled-agent").is_none());
-    }
-
-    #[test]
-    fn config_driven_chat_backends_skip_without_key() {
-        // Chat backends without API keys should be silently skipped
-        let configs = vec![thrum_core::role::BackendConfig {
-            name: "no-key-api".into(),
-            backend_type: "chat".into(),
-            command: None,
-            prompt_args: None,
-            model: Some("gpt-4o".into()),
-            provider: Some("openai".into()),
-            base_url: None,
-            api_key_env: Some("NONEXISTENT_API_KEY_FOR_TEST".into()),
-            timeout_secs: None,
-            enabled: true,
-        }];
-
-        let cwd = std::env::temp_dir();
-        let registry = build_registry_from_config(&configs, &cwd).unwrap();
-        assert_eq!(registry.len(), 0); // No backends registered without API key
-    }
-
-    #[test]
-    fn empty_config_produces_empty_registry() {
-        let cwd = std::env::temp_dir();
-        let registry = build_registry_from_config(&[], &cwd).unwrap();
-        assert!(registry.is_empty());
-    }
-
-    /// Proves the complete swappability story: roles resolve correctly
-    /// when the underlying backends change from Claude to OpenCode/Copilot.
-    #[test]
-    fn backend_swap_scenario() {
-        // Scenario: User switches from Claude to OpenCode as primary agent
-        let mut reg = BackendRegistry::new();
-        reg.register(MockBackend::agent("opencode", "devstral-small-2505"));
-        reg.register(MockBackend::chat("mistral-api", "devstral-small-2505"));
-
-        let implementer = make_role("devstral"); // model substring
-
-        let impl_backend = reg.resolve_role(&implementer).unwrap();
-        // "devstral" matches both backends. resolve_role prefers agent for non-sonnet/haiku.
-        assert_eq!(impl_backend.capability(), BackendCapability::Agent);
-        assert_eq!(impl_backend.name(), "opencode");
-
-        // For reviewer, if we want to explicitly use chat, reference by name
-        let reviewer_role = make_role("mistral-api");
-        let rev_backend = reg.resolve_role(&reviewer_role).unwrap();
-        assert_eq!(rev_backend.name(), "mistral-api");
-        assert_eq!(rev_backend.capability(), BackendCapability::Chat);
-    }
-}
diff --git a/crates/thrum-runner/src/ci.rs b/crates/thrum-runner/src/ci.rs
new file mode 100644
index 0000000..5924d85
--- /dev/null
+++ b/crates/thrum-runner/src/ci.rs
@@ -0,0 +1,1209 @@
+//! CI status polling and failure recovery.
+//!
+//! Polls GitHub CI status via `gh pr checks` and handles pass/fail.
+//! On CI failure, dispatches a ci_fixer agent to fix and re-push.
+//! Tracks CI attempts and escalates to human review after max retries.
+
+use crate::event_bus::EventBus;
+use anyhow::{Context, Result};
+use std::path::Path;
+use std::process::Command;
+use std::time::Duration;
+use thrum_core::ci::{CICheck, CIPollResult, CIStatus};
+use thrum_core::event::EventKind;
+use thrum_core::task::{RepoName, Task, TaskId, TaskStatus};
+use thrum_db::task_store::TaskStore;
+
+/// Poll CI status for a PR using `gh pr checks`.
+///
+/// Returns the aggregated CI status and individual check results.
+pub fn poll_ci_status(repo_path: &Path, pr_number: u64) -> Result<CIPollResult> {
+    let output = Command::new("gh")
+        .args([
+            "pr",
+            "checks",
+            &pr_number.to_string(),
+            "--json",
+            "name,state,detailsUrl",
+        ])
+        .current_dir(repo_path)
+        .output()
+        .context("failed to run `gh pr checks`")?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        // If no checks are configured, gh may fail
+        if stderr.contains("no checks") || stderr.contains("no status checks") {
+            return Ok(CIPollResult {
+                status: CIStatus::NoChecks,
+                checks: Vec::new(),
+                summary: "No CI checks configured for this PR".into(),
+            });
+        }
+        anyhow::bail!("gh pr checks failed: {stderr}");
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let checks: Vec<GhCheck> =
+        serde_json::from_str(&stdout).context("failed to parse gh pr checks output")?;
+
+    if checks.is_empty() {
+        return Ok(CIPollResult {
+            status: CIStatus::NoChecks,
+            checks: Vec::new(),
+            summary: "No CI checks found".into(),
+        });
+    }
+
+    let ci_checks: Vec<CICheck> = checks
+        .iter()
+        .map(|c| CICheck {
+            name: c.name.clone(),
+            status: c.state.to_lowercase(),
+            url: c.details_url.clone(),
+        })
+        .collect();
+
+    let any_pending = ci_checks.iter().any(|c| {
+        c.status == "pending"
+            || c.status == "queued"
+            || c.status == "in_progress"
+            || c.status == "waiting"
+    });
+    let any_failed = ci_checks
+        .iter()
+        .any(|c| c.status == "failure" || c.status == "error" || c.status == "cancelled");
+
+    let status = if any_pending {
+        CIStatus::Pending
+    } else if any_failed {
+        CIStatus::Fail
+    } else {
+        CIStatus::Pass
+    };
+
+    let passed = ci_checks.iter().filter(|c| c.status == "success").count();
+    let failed = ci_checks
+        .iter()
+        .filter(|c| c.status == "failure" || c.status == "error")
+        .count();
+    let pending = ci_checks.len() - passed - failed;
+
+    let summary = format!(
+        "{passed} passed, {failed} failed, {pending} pending (total: {})",
+        ci_checks.len()
+    );
+
+    Ok(CIPollResult {
+        status,
+        checks: ci_checks,
+        summary,
+    })
+}
+
+/// Merge a PR via `gh pr merge`.
+pub fn merge_pr(repo_path: &Path, pr_number: u64, strategy: &str) -> Result<String> {
+    let strategy_flag = match strategy {
+        "squash" => "--squash",
+        "rebase" => "--rebase",
+        "merge" => "--merge",
+        _ => "--squash",
+    };
+
+    let output = Command::new("gh")
+        .args([
+            "pr",
+            "merge",
+            &pr_number.to_string(),
+            strategy_flag,
+            "--delete-branch",
+        ])
+        .current_dir(repo_path)
+        .output()
+        .context("failed to run `gh pr merge`")?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        anyhow::bail!("gh pr merge failed: {stderr}");
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    Ok(stdout)
+}
+
+/// Get the merge commit SHA after a PR merge.
+pub fn get_pr_merge_sha(repo_path: &Path, pr_number: u64) -> Result<String> {
+    let output = Command::new("gh")
+        .args([
+            "pr",
+            "view",
+            &pr_number.to_string(),
+            "--json",
+            "mergeCommit",
+            "-q",
+            ".mergeCommit.oid",
+        ])
+        .current_dir(repo_path)
+        .output()
+        .context("failed to get merge commit SHA")?;
+
+    let sha = String::from_utf8_lossy(&output.stdout).trim().to_string();
+    if sha.is_empty() {
+        // Fallback: get the HEAD sha from the default branch
+        let head_output = Command::new("git")
+            .args(["rev-parse", "HEAD"])
+            .current_dir(repo_path)
+            .output()
+            .context("failed to get HEAD sha")?;
+        Ok(String::from_utf8_lossy(&head_output.stdout)
+            .trim()
+            .to_string())
+    } else {
+        Ok(sha)
+    }
+}
+
+/// Get CI failure logs via `gh run view --log-failed`.
+pub fn get_ci_failure_logs(repo_path: &Path, pr_number: u64) -> Result<String> {
+    // First, get the failed run IDs from the PR checks
+    let output = Command::new("gh")
+        .args([
+            "pr",
+            "checks",
+            &pr_number.to_string(),
+            "--json",
+            "name,state,detailsUrl",
+        ])
+        .current_dir(repo_path)
+        .output()
+        .context("failed to get PR checks")?;
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let checks: Vec<GhCheck> = serde_json::from_str(&stdout).unwrap_or_default();
+
+    let failed_checks: Vec<&GhCheck> = checks
+        .iter()
+        .filter(|c| {
+            let s = c.state.to_lowercase();
+            s == "failure" || s == "error"
+        })
+        .collect();
+
+    if failed_checks.is_empty() {
+        return Ok("No failed checks found.".into());
+    }
+
+    // Build a summary of failed checks
+    let mut logs = String::new();
+    logs.push_str(&format!(
+        "## CI Failure Summary ({} failed check(s))\n\n",
+        failed_checks.len()
+    ));
+
+    for check in &failed_checks {
+        logs.push_str(&format!("### {} ({})\n", check.name, check.state));
+        if let Some(url) = &check.details_url {
+            logs.push_str(&format!("URL: {url}\n"));
+        }
+        logs.push('\n');
+    }
+
+    // Try to get detailed logs from the most recent failed run
+    let run_output = Command::new("gh")
+        .args([
+            "run",
+            "list",
+            "--branch",
+            "--json",
+            "databaseId,status,conclusion",
+            "--limit",
+            "1",
+        ])
+        .current_dir(repo_path)
+        .output();
+
+    if let Ok(run_out) = run_output
+        && run_out.status.success()
+    {
+        let run_stdout = String::from_utf8_lossy(&run_out.stdout);
+        let runs: Vec<serde_json::Value> = serde_json::from_str(&run_stdout).unwrap_or_default();
+
+        if let Some(run) = runs.first()
+            && let Some(run_id) = run.get("databaseId").and_then(|v| v.as_u64())
+        {
+            let log_output = Command::new("gh")
+                .args(["run", "view", &run_id.to_string(), "--log-failed"])
+                .current_dir(repo_path)
+                .output();
+
+            if let Ok(log_out) = log_output
+                && log_out.status.success()
+            {
+                let log_text = String::from_utf8_lossy(&log_out.stdout);
+                // Truncate to a reasonable size for the agent
+                let truncated: String = log_text.chars().take(10000).collect();
+                logs.push_str("## Failed Run Logs\n\n```\n");
+                logs.push_str(&truncated);
+                if log_text.len() > 10000 {
+                    logs.push_str("\n... (truncated)");
+                }
+                logs.push_str("\n```\n");
+            }
+        }
+    }
+
+    Ok(logs)
+}
+
+/// Push a branch to the remote.
+pub fn push_branch(repo_path: &Path, branch: &str) -> Result<()> {
+    let output = Command::new("git")
+        .args(["push", "-u", "origin", branch])
+        .current_dir(repo_path)
+        .output()
+        .context("failed to push branch")?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        // Force push if the branch already exists with different history
+        if stderr.contains("rejected") || stderr.contains("non-fast-forward") {
+            let force_output = Command::new("git")
+                .args(["push", "--force-with-lease", "-u", "origin", branch])
+                .current_dir(repo_path)
+                .output()
+                .context("failed to force-push branch")?;
+
+            if !force_output.status.success() {
+                let stderr2 = String::from_utf8_lossy(&force_output.stderr);
+                anyhow::bail!("git push failed: {stderr2}");
+            }
+        } else {
+            anyhow::bail!("git push failed: {stderr}");
+        }
+    }
+
+    Ok(())
+}
+
+/// Create a PR via `gh pr create`.
+///
+/// Returns (pr_number, pr_url).
+pub fn create_pr(repo_path: &Path, branch: &str, title: &str, body: &str) -> Result<(u64, String)> {
+    let output = Command::new("gh")
+        .args([
+            "pr",
+            "create",
+            "--head",
+            branch,
+            "--title",
+            title,
+            "--body",
+            body,
+            "--json",
+            "number,url",
+        ])
+        .current_dir(repo_path)
+        .output()
+        .context("failed to run `gh pr create`")?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        // Check if PR already exists
+        if stderr.contains("already exists") {
+            // Get existing PR info
+            return get_existing_pr(repo_path, branch);
+        }
+        anyhow::bail!("gh pr create failed: {stderr}");
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let pr: serde_json::Value =
+        serde_json::from_str(&stdout).context("failed to parse gh pr create output")?;
+
+    let pr_number = pr
+        .get("number")
+        .and_then(|v| v.as_u64())
+        .context("missing PR number in response")?;
+    let pr_url = pr
+        .get("url")
+        .and_then(|v| v.as_str())
+        .unwrap_or("")
+        .to_string();
+
+    Ok((pr_number, pr_url))
+}
+
+/// Get an existing PR for a branch.
+fn get_existing_pr(repo_path: &Path, branch: &str) -> Result<(u64, String)> {
+    let output = Command::new("gh")
+        .args(["pr", "view", branch, "--json", "number,url"])
+        .current_dir(repo_path)
+        .output()
+        .context("failed to get existing PR")?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        anyhow::bail!("failed to find existing PR for branch {branch}: {stderr}");
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let pr: serde_json::Value = serde_json::from_str(&stdout).context("failed to parse PR info")?;
+
+    let pr_number = pr
+        .get("number")
+        .and_then(|v| v.as_u64())
+        .context("missing PR number")?;
+    let pr_url = pr
+        .get("url")
+        .and_then(|v| v.as_str())
+        .unwrap_or("")
+        .to_string();
+
+    Ok((pr_number, pr_url))
+}
+
+/// Poll CI status in a loop until pass/fail/timeout.
+///
+/// Returns the final CI status. Emits events to the event bus
+/// during polling for real-time dashboard updates.
+pub async fn poll_ci_until_complete(
+    repo_path: &Path,
+    task_id: &TaskId,
+    repo: &RepoName,
+    pr_number: u64,
+    poll_interval: Duration,
+    event_bus: &EventBus,
+) -> Result<CIPollResult> {
+    // Maximum total polling time: 1 hour
+    let max_polls = 3600 / poll_interval.as_secs().max(1);
+    let mut poll_count = 0u64;
+
+    loop {
+        poll_count += 1;
+        if poll_count > max_polls {
+            return Ok(CIPollResult {
+                status: CIStatus::Fail,
+                checks: Vec::new(),
+                summary: "CI polling timed out after 1 hour".into(),
+            });
+        }
+
+        let result = poll_ci_status(repo_path, pr_number)?;
+
+        event_bus.emit(EventKind::CICheckUpdate {
+            task_id: task_id.clone(),
+            repo: repo.clone(),
+            pr_number,
+            status: result.status.to_string(),
+            summary: result.summary.clone(),
+        });
+
+        match result.status {
+            CIStatus::Pending => {
+                tracing::debug!(
+                    task_id = %task_id,
+                    pr_number,
+                    poll = poll_count,
+                    summary = %result.summary,
+                    "CI still pending, waiting..."
+                );
+                tokio::time::sleep(poll_interval).await;
+            }
+            CIStatus::Pass | CIStatus::Fail | CIStatus::NoChecks => {
+                return Ok(result);
+            }
+        }
+    }
+}
+
+/// Run the CI polling and fix loop for a task in AwaitingCI status.
+///
+/// This is the main entry point called by the parallel engine.
+/// It polls CI, handles pass/fail, dispatches ci_fixer on failure,
+/// and escalates after max retries.
+#[allow(clippy::too_many_arguments)]
+pub async fn run_ci_loop(
+    task_store: &TaskStore<'_>,
+    event_bus: &EventBus,
+    repo_path: &Path,
+    agents_dir: &Path,
+    roles: Option<&thrum_core::role::RolesConfig>,
+    mut task: Task,
+) -> Result<()> {
+    let (
+        pr_number,
+        pr_url,
+        branch,
+        ci_attempts,
+        max_retries,
+        poll_interval,
+        auto_merge,
+        merge_strategy,
+    ) = match &task.status {
+        TaskStatus::AwaitingCI {
+            pr_number,
+            pr_url,
+            branch,
+            ci_attempts,
+            ..
+        } => {
+            // Get CI config from context or use defaults
+            let ci_config = thrum_core::repo::CIConfig::default();
+            (
+                *pr_number,
+                pr_url.clone(),
+                branch.clone(),
+                *ci_attempts,
+                ci_config.max_ci_retries,
+                Duration::from_secs(ci_config.poll_interval_secs),
+                ci_config.auto_merge,
+                ci_config.merge_strategy.clone(),
+            )
+        }
+        _ => {
+            tracing::warn!(
+                task_id = %task.id,
+                status = task.status.label(),
+                "run_ci_loop called on non-AwaitingCI task"
+            );
+            return Ok(());
+        }
+    };
+
+    tracing::info!(
+        task_id = %task.id,
+        pr_number,
+        pr_url = %pr_url,
+        ci_attempts,
+        "starting CI polling loop"
+    );
+
+    event_bus.emit(EventKind::CIPollingStarted {
+        task_id: task.id.clone(),
+        repo: task.repo.clone(),
+        pr_number,
+        pr_url: pr_url.clone(),
+    });
+
+    // Poll CI status
+    let result = poll_ci_until_complete(
+        repo_path,
+        &task.id,
+        &task.repo,
+        pr_number,
+        poll_interval,
+        event_bus,
+    )
+    .await?;
+
+    match result.status {
+        CIStatus::Pass | CIStatus::NoChecks => {
+            // CI passed — merge the PR
+            event_bus.emit(EventKind::CIPassed {
+                task_id: task.id.clone(),
+                repo: task.repo.clone(),
+                pr_number,
+            });
+
+            if auto_merge {
+                tracing::info!(
+                    task_id = %task.id,
+                    pr_number,
+                    strategy = %merge_strategy,
+                    "CI passed, merging PR"
+                );
+                merge_pr(repo_path, pr_number, &merge_strategy)?;
+
+                let commit_sha =
+                    get_pr_merge_sha(repo_path, pr_number).unwrap_or_else(|_| "pr-merged".into());
+
+                let old_label = task.status.label().to_string();
+                task.status = TaskStatus::Merged { commit_sha };
+                task.updated_at = chrono::Utc::now();
+                task_store.update(&task)?;
+
+                event_bus.emit(EventKind::TaskStateChange {
+                    task_id: task.id.clone(),
+                    repo: task.repo.clone(),
+                    from: old_label,
+                    to: "merged".into(),
+                });
+
+                tracing::info!(task_id = %task.id, "task merged via CI");
+            } else {
+                tracing::info!(
+                    task_id = %task.id,
+                    "CI passed but auto_merge disabled — task stays in awaiting-ci"
+                );
+            }
+        }
+        CIStatus::Fail => {
+            let current_attempt = ci_attempts + 1;
+
+            event_bus.emit(EventKind::CIFailed {
+                task_id: task.id.clone(),
+                repo: task.repo.clone(),
+                pr_number,
+                attempt: current_attempt,
+                max_attempts: max_retries,
+                failure_summary: result.summary.clone(),
+            });
+
+            if current_attempt > max_retries {
+                // Escalate to human review
+                tracing::warn!(
+                    task_id = %task.id,
+                    attempts = current_attempt,
+                    max_retries,
+                    "CI retries exhausted, escalating to human review"
+                );
+
+                event_bus.emit(EventKind::CIEscalated {
+                    task_id: task.id.clone(),
+                    repo: task.repo.clone(),
+                    pr_number,
+                    attempts: current_attempt,
+                    failure_summary: result.summary.clone(),
+                });
+
+                let old_label = task.status.label().to_string();
+                task.status = TaskStatus::CIFailed {
+                    pr_number,
+                    pr_url,
+                    failure_summary: result.summary,
+                    ci_attempts: current_attempt,
+                };
+                task.updated_at = chrono::Utc::now();
+                task_store.update(&task)?;
+
+                event_bus.emit(EventKind::TaskStateChange {
+                    task_id: task.id.clone(),
+                    repo: task.repo.clone(),
+                    from: old_label,
+                    to: "ci-failed".into(),
+                });
+            } else {
+                // Dispatch ci_fixer agent
+                tracing::info!(
+                    task_id = %task.id,
+                    attempt = current_attempt,
+                    max_retries,
+                    "dispatching ci_fixer agent"
+                );
+
+                dispatch_ci_fixer(
+                    task_store,
+                    event_bus,
+                    repo_path,
+                    agents_dir,
+                    roles,
+                    &mut task,
+                    pr_number,
+                    &pr_url,
+                    &branch,
+                    current_attempt,
+                    max_retries,
+                )
+                .await?;
+            }
+        }
+        CIStatus::Pending => {
+            // Should not happen — poll_ci_until_complete loops until non-pending
+            tracing::warn!(task_id = %task.id, "CI polling returned Pending unexpectedly");
+        }
+    }
+
+    Ok(())
+}
+
+/// Dispatch the ci_fixer agent to fix CI failures and re-push.
+#[allow(clippy::too_many_arguments)]
+async fn dispatch_ci_fixer(
+    task_store: &TaskStore<'_>,
+    event_bus: &EventBus,
+    repo_path: &Path,
+    agents_dir: &Path,
+    roles: Option<&thrum_core::role::RolesConfig>,
+    task: &mut Task,
+    pr_number: u64,
+    pr_url: &str,
+    branch: &str,
+    current_attempt: u32,
+    max_retries: u32,
+) -> Result<()> {
+    // Get CI failure logs
+    let failure_logs = get_ci_failure_logs(repo_path, pr_number)
+        .unwrap_or_else(|e| format!("Failed to get CI logs: {e}"));
+
+    // Load the ci_fixer prompt template
+    let ci_fixer_prompt_file = agents_dir.join("ci_fixer.md");
+    let system_prompt = crate::claude_code::load_agent_prompt(&ci_fixer_prompt_file, None)
+        .await
+        .unwrap_or_else(|_| default_ci_fixer_prompt());
+
+    // Build the full prompt with system context
+    let full_prompt = format!(
+        "{system_prompt}\n\n\
+         ## CI Fix Required\n\n\
+         **Task**: {} ({})\n\
+         **PR**: #{pr_number} ({pr_url})\n\
+         **Branch**: {branch}\n\
+         **Attempt**: {current_attempt}/{max_retries}\n\n\
+         ## CI Failure Logs\n\n{failure_logs}\n\n\
+         ## Instructions\n\n\
+         1. Read the CI failure logs above carefully\n\
+         2. Identify the root cause of the failure\n\
+         3. Fix the issue in the codebase\n\
+         4. Run the relevant tests locally to verify your fix\n\
+         5. Commit and push your changes\n\n\
+         The fix should be minimal and targeted — only change what's needed to make CI pass.\n\
+         Do NOT refactor or add features. Focus solely on fixing the CI failure.",
+        task.id, task.title
+    );
+
+    // Get role config
+    let role = roles
+        .map(|r| r.ci_fixer())
+        .unwrap_or_else(|| thrum_core::role::AgentRole {
+            backend: "opus".into(),
+            prompt_template: "agents/ci_fixer.md".into(),
+            budget_usd: Some(3.0),
+            timeout_secs: Some(600),
+            timeout_recovery: thrum_core::role::TimeoutRecoveryStrategy::Retry,
+        });
+
+    let agent_id = thrum_core::agent::AgentId(format!("ci_fixer-{}", task.id));
+
+    tracing::info!(
+        task_id = %task.id,
+        "invoking ci_fixer agent"
+    );
+
+    let agent_config = crate::claude_code::AgentConfig {
+        prompt: full_prompt,
+        cwd: repo_path.to_path_buf(),
+        max_budget_usd: role.budget_usd.unwrap_or(3.0),
+        model: "claude-opus-4-6".into(),
+        resume_session_id: None,
+        agent: None,
+        worktree: false,
+        permission_mode: "auto".into(),
+        timeout_secs: role.timeout_secs.unwrap_or(600),
+    };
+
+    let result =
+        crate::claude_code::invoke_streaming(&agent_config, event_bus, &agent_id, &task.id).await?;
+
+    if result.exit_code.is_some_and(|c| c != 0) && !result.timed_out {
+        tracing::warn!(
+            task_id = %task.id,
+            exit_code = ?result.exit_code,
+            "ci_fixer agent failed"
+        );
+    }
+
+    // Push the fix (the agent should have committed changes)
+    match push_branch(repo_path, branch) {
+        Ok(()) => {
+            tracing::info!(
+                task_id = %task.id,
+                branch,
+                "ci_fixer pushed fix commit"
+            );
+
+            event_bus.emit(EventKind::CIFixPushed {
+                task_id: task.id.clone(),
+                repo: task.repo.clone(),
+                pr_number,
+                attempt: current_attempt,
+            });
+
+            // Update task with incremented CI attempts, back to AwaitingCI
+            let old_label = task.status.label().to_string();
+            task.status = TaskStatus::AwaitingCI {
+                pr_number,
+                pr_url: pr_url.to_string(),
+                branch: branch.to_string(),
+                started_at: chrono::Utc::now(),
+                ci_attempts: current_attempt,
+            };
+            task.updated_at = chrono::Utc::now();
+            task_store.update(task)?;
+
+            event_bus.emit(EventKind::TaskStateChange {
+                task_id: task.id.clone(),
+                repo: task.repo.clone(),
+                from: old_label,
+                to: "awaiting-ci".into(),
+            });
+        }
+        Err(e) => {
+            tracing::error!(
+                task_id = %task.id,
+                error = %e,
+                "failed to push ci_fixer changes"
+            );
+            // Escalate since we can't push
+            let old_label = task.status.label().to_string();
+            task.status = TaskStatus::CIFailed {
+                pr_number,
+                pr_url: pr_url.to_string(),
+                failure_summary: format!("ci_fixer push failed: {e}"),
+                ci_attempts: current_attempt,
+            };
+            task.updated_at = chrono::Utc::now();
+            task_store.update(task)?;
+
+            event_bus.emit(EventKind::TaskStateChange {
+                task_id: task.id.clone(),
+                repo: task.repo.clone(),
+                from: old_label,
+                to: "ci-failed".into(),
+            });
+        }
+    }
+
+    Ok(())
+}
+
+/// Default ci_fixer system prompt when no template file exists.
+fn default_ci_fixer_prompt() -> String {
+    "You are a CI Fix Agent. Your sole job is to fix CI failures on a pull request branch.\n\n\
+     ## Process\n\
+     1. Read the CI failure logs provided in the prompt\n\
+     2. Identify the root cause (build error, test failure, lint issue, etc.)\n\
+     3. Make the minimum necessary fix\n\
+     4. Run relevant checks locally to verify\n\
+     5. Commit the fix with a clear message like \"fix: resolve CI failure in <component>\"\n\n\
+     ## Rules\n\
+     - Make MINIMAL changes — only fix the CI failure\n\
+     - Do NOT refactor, add features, or restructure code\n\
+     - Do NOT modify CI configuration unless the config itself is the bug\n\
+     - Commit your fix before exiting\n"
+        .into()
+}
+
+/// JSON structure returned by `gh pr checks --json`.
+#[derive(Debug, serde::Deserialize)]
+#[serde(rename_all = "camelCase")]
+struct GhCheck {
+    name: String,
+    state: String,
+    #[serde(default)]
+    details_url: Option<String>,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use thrum_core::ci::CIPollResult;
+
+    #[test]
+    fn ci_status_display() {
+        assert_eq!(CIStatus::Pending.to_string(), "pending");
+        assert_eq!(CIStatus::Pass.to_string(), "pass");
+        assert_eq!(CIStatus::Fail.to_string(), "fail");
+        assert_eq!(CIStatus::NoChecks.to_string(), "no-checks");
+    }
+
+    #[test]
+    fn default_ci_fixer_prompt_not_empty() {
+        let prompt = default_ci_fixer_prompt();
+        assert!(!prompt.is_empty());
+        assert!(prompt.contains("CI Fix Agent"));
+    }
+
+    #[test]
+    fn default_ci_fixer_prompt_has_required_sections() {
+        let prompt = default_ci_fixer_prompt();
+        assert!(prompt.contains("Process"));
+        assert!(prompt.contains("MINIMAL") || prompt.contains("minimal"));
+        assert!(prompt.contains("ommit"));
+    }
+
+    #[test]
+    fn ci_config_defaults() {
+        let config = thrum_core::repo::CIConfig::default();
+        assert!(config.enabled);
+        assert_eq!(config.poll_interval_secs, 60);
+        assert_eq!(config.max_ci_retries, 3);
+        assert!(config.auto_merge);
+        assert_eq!(config.merge_strategy, "squash");
+    }
+
+    #[test]
+    fn ci_config_poll_interval_is_reasonable() {
+        let config = thrum_core::repo::CIConfig::default();
+        assert!(config.poll_interval_secs >= 10);
+        assert!(config.poll_interval_secs <= 600);
+    }
+
+    #[test]
+    fn ci_config_max_retries_is_bounded() {
+        let config = thrum_core::repo::CIConfig::default();
+        assert!(config.max_ci_retries >= 1);
+        assert!(config.max_ci_retries <= 10);
+    }
+
+    #[test]
+    fn task_status_awaiting_ci() {
+        let status = TaskStatus::AwaitingCI {
+            pr_number: 42,
+            pr_url: "https://github.com/org/repo/pull/42".into(),
+            branch: "auto/TASK-0001/repo/feature".into(),
+            started_at: chrono::Utc::now(),
+            ci_attempts: 0,
+        };
+        assert_eq!(status.label(), "awaiting-ci");
+        assert!(status.is_awaiting_ci());
+        assert!(!status.is_terminal());
+        assert!(!status.needs_human());
+    }
+
+    #[test]
+    fn task_status_awaiting_ci_is_not_claimable() {
+        let status = TaskStatus::AwaitingCI {
+            pr_number: 42,
+            pr_url: "https://github.com/org/repo/pull/42".into(),
+            branch: "auto/TASK-0001/repo/feature".into(),
+            started_at: chrono::Utc::now(),
+            ci_attempts: 0,
+        };
+        assert!(!status.is_claimable_pending());
+        assert!(!status.is_claimable_retry());
+        assert!(!status.is_claimable_approved());
+    }
+
+    #[test]
+    fn task_status_ci_failed() {
+        let status = TaskStatus::CIFailed {
+            pr_number: 42,
+            pr_url: "https://github.com/org/repo/pull/42".into(),
+            failure_summary: "test failure".into(),
+            ci_attempts: 3,
+        };
+        assert_eq!(status.label(), "ci-failed");
+        assert!(status.needs_human());
+        assert!(!status.is_terminal());
+    }
+
+    #[test]
+    fn task_status_ci_failed_is_not_claimable() {
+        let status = TaskStatus::CIFailed {
+            pr_number: 42,
+            pr_url: "https://github.com/org/repo/pull/42".into(),
+            failure_summary: "test failure".into(),
+            ci_attempts: 3,
+        };
+        assert!(!status.is_claimable_pending());
+        assert!(!status.is_claimable_retry());
+        assert!(!status.is_claimable_approved());
+    }
+
+    #[test]
+    fn ci_attempts_tracking_across_retries() {
+        // Verify initial AwaitingCI starts at 0 attempts
+        let status = TaskStatus::AwaitingCI {
+            pr_number: 42,
+            pr_url: "https://github.com/org/repo/pull/42".into(),
+            branch: "auto/TASK-0001/repo/feature".into(),
+            started_at: chrono::Utc::now(),
+            ci_attempts: 0,
+        };
+        assert_eq!(status.label(), "awaiting-ci");
+
+        // Simulate retry attempts (each creates a new status)
+        for attempt in 1..=3 {
+            let retry_status = TaskStatus::AwaitingCI {
+                pr_number: 42,
+                pr_url: "https://github.com/org/repo/pull/42".into(),
+                branch: "auto/TASK-0001/repo/feature".into(),
+                started_at: chrono::Utc::now(),
+                ci_attempts: attempt,
+            };
+            if let TaskStatus::AwaitingCI { ci_attempts, .. } = &retry_status {
+                assert_eq!(*ci_attempts, attempt);
+            }
+        }
+
+        let max_retries = thrum_core::repo::CIConfig::default().max_ci_retries;
+        let escalated = TaskStatus::CIFailed {
+            pr_number: 42,
+            pr_url: "https://github.com/org/repo/pull/42".into(),
+            failure_summary: "build failed after max retries".into(),
+            ci_attempts: max_retries + 1,
+        };
+        assert!(escalated.needs_human());
+        assert_eq!(escalated.label(), "ci-failed");
+    }
+
+    /// CI events emitted through the EventBus should be receivable by subscribers.
+    #[tokio::test]
+    async fn ci_events_emitted_to_event_bus() {
+        let bus = crate::event_bus::EventBus::new();
+        let mut rx = bus.subscribe();
+
+        bus.emit(EventKind::CIPollingStarted {
+            task_id: TaskId(42),
+            repo: RepoName::new("loom"),
+            pr_number: 99,
+            pr_url: "https://github.com/org/loom/pull/99".into(),
+        });
+
+        let event = rx.recv().await.unwrap();
+        match &event.kind {
+            EventKind::CIPollingStarted {
+                task_id, pr_number, ..
+            } => {
+                assert_eq!(*task_id, TaskId(42));
+                assert_eq!(*pr_number, 99);
+            }
+            other => panic!("expected CIPollingStarted, got {:?}", other),
+        }
+    }
+
+    /// Validate all CI event variants can be emitted and received.
+    #[tokio::test]
+    async fn all_ci_event_variants_round_trip_through_bus() {
+        let bus = crate::event_bus::EventBus::new();
+        let mut rx = bus.subscribe();
+
+        let events = vec![
+            EventKind::CIPollingStarted {
+                task_id: TaskId(1),
+                repo: RepoName::new("r"),
+                pr_number: 1,
+                pr_url: "url".into(),
+            },
+            EventKind::CICheckUpdate {
+                task_id: TaskId(1),
+                repo: RepoName::new("r"),
+                pr_number: 1,
+                status: "pending".into(),
+                summary: "checking...".into(),
+            },
+            EventKind::CIPassed {
+                task_id: TaskId(1),
+                repo: RepoName::new("r"),
+                pr_number: 1,
+            },
+            EventKind::CIFailed {
+                task_id: TaskId(1),
+                repo: RepoName::new("r"),
+                pr_number: 1,
+                attempt: 1,
+                max_attempts: 3,
+                failure_summary: "test failed".into(),
+            },
+            EventKind::CIFixPushed {
+                task_id: TaskId(1),
+                repo: RepoName::new("r"),
+                pr_number: 1,
+                attempt: 1,
+            },
+            EventKind::CIEscalated {
+                task_id: TaskId(1),
+                repo: RepoName::new("r"),
+                pr_number: 1,
+                attempts: 3,
+                failure_summary: "persistent failure".into(),
+            },
+        ];
+
+        let expected_count = events.len();
+        for event in events {
+            bus.emit(event);
+        }
+
+        for _ in 0..expected_count {
+            let event = rx.recv().await.unwrap();
+            let display = event.to_string();
+            assert!(
+                display.contains("CI") || display.contains("PR #"),
+                "expected CI event in display, got: {display}"
+            );
+        }
+    }
+
+    #[test]
+    fn merge_strategy_flags() {
+        let strategies = [
+            ("squash", "--squash"),
+            ("rebase", "--rebase"),
+            ("merge", "--merge"),
+            ("unknown", "--squash"),
+        ];
+        for (strategy, expected_flag) in strategies {
+            let flag = match strategy {
+                "squash" => "--squash",
+                "rebase" => "--rebase",
+                "merge" => "--merge",
+                _ => "--squash",
+            };
+            assert_eq!(flag, expected_flag, "strategy '{strategy}' mapped wrong");
+        }
+    }
+
+    #[test]
+    fn ci_poll_result_from_checks_aggregation() {
+        let checks = vec![
+            CICheck {
+                name: "build".into(),
+                status: "success".into(),
+                url: None,
+            },
+            CICheck {
+                name: "test".into(),
+                status: "success".into(),
+                url: None,
+            },
+        ];
+        let result = CIPollResult::from_checks(checks);
+        assert_eq!(result.status, CIStatus::Pass);
+        assert_eq!(result.checks.len(), 2);
+        assert!(result.summary.contains("2 passed"));
+    }
+
+    #[test]
+    fn ci_poll_result_mixed_statuses() {
+        let checks = vec![
+            CICheck {
+                name: "build".into(),
+                status: "success".into(),
+                url: None,
+            },
+            CICheck {
+                name: "test".into(),
+                status: "failure".into(),
+                url: Some("https://ci.example.com/run/789".into()),
+            },
+            CICheck {
+                name: "lint".into(),
+                status: "success".into(),
+                url: None,
+            },
+        ];
+        let result = CIPollResult::from_checks(checks);
+        assert_eq!(result.status, CIStatus::Fail);
+        assert!(result.summary.contains("1 failed"));
+        assert!(result.summary.contains("2 passed"));
+    }
+
+    #[test]
+    fn awaiting_ci_serialization_preserves_fields() {
+        let now = chrono::Utc::now();
+        let status = TaskStatus::AwaitingCI {
+            pr_number: 42,
+            pr_url: "https://github.com/org/repo/pull/42".into(),
+            branch: "auto/TASK-0001/repo/feature".into(),
+            started_at: now,
+            ci_attempts: 2,
+        };
+
+        let json = serde_json::to_string(&status).unwrap();
+        let parsed: TaskStatus = serde_json::from_str(&json).unwrap();
+
+        match parsed {
+            TaskStatus::AwaitingCI {
+                pr_number,
+                pr_url,
+                branch,
+                ci_attempts,
+                ..
+            } => {
+                assert_eq!(pr_number, 42);
+                assert_eq!(pr_url, "https://github.com/org/repo/pull/42");
+                assert_eq!(branch, "auto/TASK-0001/repo/feature");
+                assert_eq!(ci_attempts, 2);
+            }
+            other => panic!("expected AwaitingCI, got {}", other.label()),
+        }
+    }
+
+    #[test]
+    fn ci_failed_serialization_preserves_fields() {
+        let status = TaskStatus::CIFailed {
+            pr_number: 99,
+            pr_url: "https://github.com/org/repo/pull/99".into(),
+            failure_summary: "cargo test failed: 3 tests failed".into(),
+            ci_attempts: 4,
+        };
+
+        let json = serde_json::to_string(&status).unwrap();
+        let parsed: TaskStatus = serde_json::from_str(&json).unwrap();
+
+        match parsed {
+            TaskStatus::CIFailed {
+                pr_number,
+                failure_summary,
+                ci_attempts,
+                ..
+            } => {
+                assert_eq!(pr_number, 99);
+                assert!(failure_summary.contains("3 tests failed"));
+                assert_eq!(ci_attempts, 4);
+            }
+            other => panic!("expected CIFailed, got {}", other.label()),
+        }
+    }
+
+    #[test]
+    fn gh_check_deserialization() {
+        let json = r#"{"name":"CI","state":"SUCCESS","detailsUrl":"https://example.com/run/1"}"#;
+        let check: GhCheck = serde_json::from_str(json).unwrap();
+        assert_eq!(check.name, "CI");
+        assert_eq!(check.state, "SUCCESS");
+        assert_eq!(
+            check.details_url.as_deref(),
+            Some("https://example.com/run/1")
+        );
+    }
+
+    #[test]
+    fn gh_check_deserialization_no_url() {
+        let json = r#"{"name":"lint","state":"FAILURE"}"#;
+        let check: GhCheck = serde_json::from_str(json).unwrap();
+        assert_eq!(check.name, "lint");
+        assert_eq!(check.state, "FAILURE");
+        assert!(check.details_url.is_none());
+    }
+
+    #[test]
+    fn gh_checks_array_deserialization() {
+        let json = r#"[
+            {"name":"build","state":"SUCCESS","detailsUrl":"https://example.com/1"},
+            {"name":"test","state":"FAILURE","detailsUrl":"https://example.com/2"},
+            {"name":"lint","state":"PENDING"}
+        ]"#;
+        let checks: Vec<GhCheck> = serde_json::from_str(json).unwrap();
+        assert_eq!(checks.len(), 3);
+        assert_eq!(checks[0].state, "SUCCESS");
+        assert_eq!(checks[1].state, "FAILURE");
+        assert_eq!(checks[2].state, "PENDING");
+    }
+
+    /// Engine should process other tasks while CI is being polled.
+    #[tokio::test]
+    async fn ci_dispatch_does_not_block_engine() {
+        use std::sync::Arc;
+        use std::sync::atomic::{AtomicBool, Ordering};
+
+        let ci_running = Arc::new(AtomicBool::new(false));
+        let ci_running_clone = ci_running.clone();
+
+        let mut join_set = tokio::task::JoinSet::new();
+
+        join_set.spawn(async move {
+            ci_running_clone.store(true, Ordering::SeqCst);
+            tokio::time::sleep(Duration::from_millis(50)).await;
+            ci_running_clone.store(false, Ordering::SeqCst);
+            42u32
+        });
+
+        let engine_work_completed = true;
+        assert!(engine_work_completed, "engine should not be blocked by CI");
+
+        let result = join_set.join_next().await.unwrap().unwrap();
+        assert_eq!(result, 42);
+    }
+}
diff --git a/crates/thrum-runner/src/claude.rs b/crates/thrum-runner/src/claude.rs
deleted file mode 100644
index ddd01d0..0000000
--- a/crates/thrum-runner/src/claude.rs
+++ /dev/null
@@ -1,249 +0,0 @@
-//! Claude Code CLI backend — the primary agent for implementation tasks.
-//!
-//! Spawns `claude -p "prompt" --output-format json` as a subprocess.
-//! This backend has full agent capabilities: file editing, terminal, git.
-//!
-//! Supports session continuation: when a previous session ID is provided
-//! via `AiRequest::resume_session_id`, uses `--resume {id}` to continue
-//! the existing session, preserving agent context across retries.
-
-use crate::backend::{AiBackend, AiRequest, AiResponse, BackendCapability};
-use crate::subprocess::{SubprocessOutput, run_cmd};
-use anyhow::{Context, Result};
-use async_trait::async_trait;
-use std::path::{Path, PathBuf};
-use std::time::Duration;
-
-/// Default timeout for a Claude session (20 minutes).
-const CLAUDE_TIMEOUT: Duration = Duration::from_secs(1200);
-
-/// Claude Code CLI backend.
-pub struct ClaudeCliBackend {
-    /// Default working directory.
-    pub default_cwd: PathBuf,
-    /// Session timeout.
-    pub timeout: Duration,
-    /// Whether to use --dangerously-skip-permissions.
-    pub skip_permissions: bool,
-}
-
-impl ClaudeCliBackend {
-    pub fn new(default_cwd: PathBuf) -> Self {
-        Self {
-            default_cwd,
-            timeout: CLAUDE_TIMEOUT,
-            skip_permissions: false,
-        }
-    }
-}
-
-#[async_trait]
-impl AiBackend for ClaudeCliBackend {
-    fn name(&self) -> &str {
-        "claude-code"
-    }
-
-    fn capability(&self) -> BackendCapability {
-        BackendCapability::Agent
-    }
-
-    fn model(&self) -> &str {
-        "claude-opus-4-6"
-    }
-
-    async fn invoke(&self, request: &AiRequest) -> Result<AiResponse> {
-        let cwd = request.cwd.as_deref().unwrap_or(&self.default_cwd);
-
-        let mut cmd_parts = vec!["claude".to_string()];
-
-        // Session continuation: resume an existing session to preserve context
-        if let Some(ref session_id) = request.resume_session_id {
-            cmd_parts.push("--resume".into());
-            cmd_parts.push(session_id.clone());
-            tracing::info!(session_id, "resuming Claude session");
-        }
-
-        cmd_parts.push("-p".into());
-
-        let escaped = request.prompt.replace('\'', "'\\''");
-        cmd_parts.push(format!("'{escaped}'"));
-        cmd_parts.push("--output-format".into());
-        cmd_parts.push("json".into());
-
-        if self.skip_permissions {
-            cmd_parts.push("--dangerously-skip-permissions".into());
-        }
-
-        // Write system prompt to temp file if provided
-        if let Some(ref sys) = request.system_prompt {
-            let tmp =
-                std::env::temp_dir().join(format!("thrum-sysprompt-{}.md", std::process::id()));
-            tokio::fs::write(&tmp, sys).await?;
-            cmd_parts.push("--system-prompt".into());
-            cmd_parts.push(format!("'{}'", tmp.display()));
-        }
-
-        let cmd = cmd_parts.join(" ");
-        tracing::info!(prompt_len = request.prompt.len(), cwd = %cwd.display(), "invoking claude CLI");
-
-        let output = run_cmd(&cmd, cwd, self.timeout).await?;
-        let (content, session_id) = parse_claude_output(&output);
-
-        Ok(AiResponse {
-            content,
-            model: "claude-opus-4-6".into(),
-            input_tokens: None,
-            output_tokens: None,
-            timed_out: output.timed_out,
-            exit_code: Some(output.exit_code),
-            session_id,
-        })
-    }
-
-    async fn health_check(&self) -> Result<()> {
-        let output = run_cmd(
-            "claude --version",
-            &self.default_cwd,
-            Duration::from_secs(5),
-        )
-        .await?;
-        if output.success() {
-            Ok(())
-        } else {
-            anyhow::bail!("claude CLI not available: {}", output.stderr)
-        }
-    }
-}
-
-/// Parse Claude CLI JSON output, extracting both the result text and session ID.
-///
-/// Claude Code's `--output-format json` returns a JSON object with:
-/// - `result`: the text output from the agent
-/// - `session_id`: a unique identifier for the session (used for `--resume`)
-fn parse_claude_output(output: &SubprocessOutput) -> (String, Option<String>) {
-    if output.timed_out {
-        // On timeout, still try to extract session_id from any partial output
-        if let Ok(json) = serde_json::from_str::<serde_json::Value>(&output.stdout) {
-            let session_id = json
-                .get("session_id")
-                .and_then(|v| v.as_str())
-                .map(String::from);
-            return (String::new(), session_id);
-        }
-        return (String::new(), None);
-    }
-
-    // Try JSON parse, fall back to raw stdout
-    if let Ok(json) = serde_json::from_str::<serde_json::Value>(&output.stdout) {
-        let content = json
-            .get("result")
-            .and_then(|v| v.as_str())
-            .unwrap_or(&output.stdout)
-            .to_string();
-        let session_id = json
-            .get("session_id")
-            .and_then(|v| v.as_str())
-            .map(String::from);
-        (content, session_id)
-    } else {
-        (output.stdout.clone(), None)
-    }
-}
-
-/// Load an agent system prompt from a markdown file, optionally embedding
-/// a CLAUDE.md from the target repo.
-pub async fn load_agent_prompt(agent_file: &Path, claude_md: Option<&Path>) -> Result<String> {
-    let mut prompt = tokio::fs::read_to_string(agent_file)
-        .await
-        .context(format!(
-            "failed to read agent file: {}",
-            agent_file.display()
-        ))?;
-
-    if let Some(claude_md_path) = claude_md {
-        let repo_claude = tokio::fs::read_to_string(claude_md_path)
-            .await
-            .context(format!(
-                "failed to read CLAUDE.md: {}",
-                claude_md_path.display()
-            ))?;
-        prompt = prompt.replace("{{CLAUDE_MD}}", &repo_claude);
-    }
-
-    Ok(prompt)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn parse_json_with_session_id() {
-        let output = SubprocessOutput {
-            stdout: r#"{"result": "done", "session_id": "ses-abc123"}"#.into(),
-            stderr: String::new(),
-            exit_code: 0,
-            timed_out: false,
-        };
-        let (content, session_id) = parse_claude_output(&output);
-        assert_eq!(content, "done");
-        assert_eq!(session_id.as_deref(), Some("ses-abc123"));
-    }
-
-    #[test]
-    fn parse_json_without_session_id() {
-        let output = SubprocessOutput {
-            stdout: r#"{"result": "done"}"#.into(),
-            stderr: String::new(),
-            exit_code: 0,
-            timed_out: false,
-        };
-        let (content, session_id) = parse_claude_output(&output);
-        assert_eq!(content, "done");
-        assert!(session_id.is_none());
-    }
-
-    #[test]
-    fn parse_timeout_extracts_session_id() {
-        let output = SubprocessOutput {
-            stdout: r#"{"result": "partial", "session_id": "ses-timeout"}"#.into(),
-            stderr: "timed out".into(),
-            exit_code: -1,
-            timed_out: true,
-        };
-        let (content, session_id) = parse_claude_output(&output);
-        assert!(content.is_empty());
-        assert_eq!(session_id.as_deref(), Some("ses-timeout"));
-    }
-
-    #[test]
-    fn parse_timeout_no_output() {
-        let output = SubprocessOutput {
-            stdout: String::new(),
-            stderr: "timed out".into(),
-            exit_code: -1,
-            timed_out: true,
-        };
-        let (content, session_id) = parse_claude_output(&output);
-        assert!(content.is_empty());
-        assert!(session_id.is_none());
-    }
-
-    #[test]
-    fn parse_non_json_output() {
-        let output = SubprocessOutput {
-            stdout: "raw text output".into(),
-            stderr: String::new(),
-            exit_code: 0,
-            timed_out: false,
-        };
-        let (content, session_id) = parse_claude_output(&output);
-        assert_eq!(content, "raw text output");
-        assert!(session_id.is_none());
-    }
-
-    #[test]
-    fn default_timeout_is_1200s() {
-        assert_eq!(CLAUDE_TIMEOUT, Duration::from_secs(1200));
-    }
-}
diff --git a/crates/thrum-runner/src/claude_code.rs b/crates/thrum-runner/src/claude_code.rs
new file mode 100644
index 0000000..004b214
--- /dev/null
+++ b/crates/thrum-runner/src/claude_code.rs
@@ -0,0 +1,545 @@
+//! Claude Code integration — spawn agents via `claude -p` with stream-json.
+//!
+//! Replaces: backend.rs, claude.rs, anthropic.rs, openai_compat.rs,
+//! cli_agent.rs, subprocess.rs (for agent invocation), sandbox.rs,
+//! shutdown.rs, worktree.rs.
+//!
+//! One function: `spawn_agent()` — invokes Claude Code, parses NDJSON,
+//! streams typed events, returns the final result.
+
+use anyhow::{Context, Result};
+use serde::{Deserialize, Serialize};
+use std::path::{Path, PathBuf};
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::process::Command;
+use tokio::sync::mpsc;
+use tokio::task::JoinHandle;
+
+/// Result from an AI backend invocation.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AiResponse {
+    /// The text output from the AI.
+    pub content: String,
+    /// Model used (e.g., "claude-opus-4-6", "devstral-small-2505").
+    pub model: String,
+    /// Input tokens consumed.
+    pub input_tokens: Option<u64>,
+    /// Output tokens produced.
+    pub output_tokens: Option<u64>,
+    /// Whether the invocation timed out.
+    pub timed_out: bool,
+    /// Exit code (for CLI-based backends).
+    pub exit_code: Option<i32>,
+    /// Session ID from the backend, used for session continuation on retries.
+    #[serde(default)]
+    pub session_id: Option<String>,
+}
+
+/// Configuration for spawning a Claude Code agent.
+#[derive(Debug, Clone)]
+pub struct AgentConfig {
+    /// The prompt to send.
+    pub prompt: String,
+    /// Working directory (repo root).
+    pub cwd: PathBuf,
+    /// Maximum budget in USD. Claude Code enforces this internally.
+    pub max_budget_usd: f64,
+    /// Model to use.
+    pub model: String,
+    /// Session ID to resume (for retries with context).
+    pub resume_session_id: Option<String>,
+    /// Agent definition name (maps to `.claude/agents/{name}.md`).
+    pub agent: Option<String>,
+    /// Whether to create an isolated worktree.
+    pub worktree: bool,
+    /// Permission mode: "auto", "default", etc.
+    pub permission_mode: String,
+    /// Timeout in seconds (enforced by us, not Claude).
+    pub timeout_secs: u64,
+}
+
+impl AgentConfig {
+    pub fn new(prompt: impl Into<String>, cwd: impl Into<PathBuf>) -> Self {
+        Self {
+            prompt: prompt.into(),
+            cwd: cwd.into(),
+            max_budget_usd: 5.0,
+            model: "claude-opus-4-6".into(),
+            resume_session_id: None,
+            agent: None,
+            worktree: true,
+            permission_mode: "auto".into(),
+            timeout_secs: 1200,
+        }
+    }
+}
+
+/// A typed event parsed from Claude Code's stream-json output.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum AgentEvent {
+    /// Session initialized — contains session_id and available tools.
+    SessionStarted { session_id: String },
+    /// Agent called a tool (Edit, Bash, Read, etc.)
+    ToolCall {
+        tool: String,
+        input: serde_json::Value,
+    },
+    /// Tool execution result.
+    ToolResult {
+        tool: String,
+        #[serde(default)]
+        error: bool,
+    },
+    /// Agent text output (thinking/commentary).
+    Text { text: String },
+    /// Agent completed.
+    Completed { result: AgentResult },
+}
+
+/// Final result from a Claude Code agent invocation.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AgentResult {
+    /// The final text output.
+    pub output: String,
+    /// Total cost in USD (from Claude Code's accounting).
+    pub cost_usd: f64,
+    /// Session ID for potential resume.
+    pub session_id: Option<String>,
+    /// Number of turns (tool call rounds).
+    pub num_turns: u32,
+    /// Duration in milliseconds.
+    pub duration_ms: u64,
+    /// Whether the agent timed out.
+    pub timed_out: bool,
+}
+
+/// Handle to a running agent — await the result or receive streaming events.
+pub struct AgentHandle {
+    pub join: JoinHandle<Result<AgentResult>>,
+    pub events: mpsc::Receiver<AgentEvent>,
+}
+
+/// Spawn a Claude Code agent and stream its output as typed events.
+///
+/// This single function replaces the entire backend/subprocess/claude/sandbox
+/// stack. Claude Code manages its own worktree, sandbox, and session state.
+pub async fn spawn_agent(config: &AgentConfig) -> Result<AgentHandle> {
+    let mut cmd = Command::new("claude");
+
+    // Core: print mode with stream-json output
+    cmd.arg("-p").arg(&config.prompt);
+    cmd.arg("--output-format").arg("stream-json");
+
+    // Model selection
+    cmd.arg("--model").arg(&config.model);
+
+    // Budget enforcement (Claude Code tracks this internally)
+    cmd.arg("--max-budget-usd")
+        .arg(config.max_budget_usd.to_string());
+
+    // Permission mode
+    cmd.arg("--permission-mode").arg(&config.permission_mode);
+
+    // Worktree isolation (Claude manages lifecycle)
+    if config.worktree {
+        cmd.arg("--worktree");
+    }
+
+    // Resume previous session
+    if let Some(ref session_id) = config.resume_session_id {
+        cmd.arg("--resume").arg(session_id);
+    }
+
+    // Agent definition
+    if let Some(ref agent) = config.agent {
+        cmd.arg("--agent").arg(agent);
+    }
+
+    // Environment: prevent nesting detection
+    cmd.current_dir(&config.cwd)
+        .env_remove("CLAUDECODE")
+        .env_remove("CLAUDE_CODE_ENTRYPOINT")
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped());
+
+    tracing::info!(
+        cwd = %config.cwd.display(),
+        model = %config.model,
+        budget = config.max_budget_usd,
+        worktree = config.worktree,
+        resume = ?config.resume_session_id,
+        agent = ?config.agent,
+        "spawning claude code agent"
+    );
+
+    let mut child = cmd.spawn().context("failed to spawn claude CLI")?;
+    let stdout = child.stdout.take().context("no stdout from claude")?;
+    let stderr = child.stderr.take().context("no stderr from claude")?;
+
+    let (tx, rx) = mpsc::channel(256);
+    let timeout = std::time::Duration::from_secs(config.timeout_secs);
+
+    let handle = tokio::spawn(async move {
+        let mut reader = BufReader::new(stdout).lines();
+        let mut stderr_reader = BufReader::new(stderr).lines();
+        let mut session_id: Option<String> = None;
+        let mut last_result: Option<AgentResult> = None;
+
+        // Drain stderr in background (just log it)
+        let stderr_handle = tokio::spawn(async move {
+            while let Ok(Some(line)) = stderr_reader.next_line().await {
+                tracing::debug!(stderr = %line, "claude stderr");
+            }
+        });
+
+        let stream_future = async {
+            while let Ok(Some(line)) = reader.next_line().await {
+                if line.trim().is_empty() {
+                    continue;
+                }
+
+                let json: serde_json::Value = match serde_json::from_str(&line) {
+                    Ok(v) => v,
+                    Err(e) => {
+                        tracing::trace!(line = %line, error = %e, "non-json line from claude");
+                        continue;
+                    }
+                };
+
+                let event_type = json.get("type").and_then(|v| v.as_str()).unwrap_or("");
+
+                match event_type {
+                    "system" => {
+                        if let Some(sid) = json.get("session_id").and_then(|v| v.as_str()) {
+                            session_id = Some(sid.to_string());
+                            let _ = tx
+                                .send(AgentEvent::SessionStarted {
+                                    session_id: sid.to_string(),
+                                })
+                                .await;
+                        }
+                    }
+                    "assistant" => {
+                        // Extract tool_use and text content blocks
+                        if let Some(content) =
+                            json.pointer("/message/content").and_then(|v| v.as_array())
+                        {
+                            for block in content {
+                                let block_type =
+                                    block.get("type").and_then(|v| v.as_str()).unwrap_or("");
+                                match block_type {
+                                    "tool_use" => {
+                                        let tool = block
+                                            .get("name")
+                                            .and_then(|v| v.as_str())
+                                            .unwrap_or("unknown")
+                                            .to_string();
+                                        let input = block
+                                            .get("input")
+                                            .cloned()
+                                            .unwrap_or(serde_json::Value::Null);
+                                        let _ = tx.send(AgentEvent::ToolCall { tool, input }).await;
+                                    }
+                                    "tool_result" => {
+                                        let tool = block
+                                            .get("name")
+                                            .and_then(|v| v.as_str())
+                                            .unwrap_or("unknown")
+                                            .to_string();
+                                        let error = block
+                                            .get("is_error")
+                                            .and_then(|v| v.as_bool())
+                                            .unwrap_or(false);
+                                        let _ =
+                                            tx.send(AgentEvent::ToolResult { tool, error }).await;
+                                    }
+                                    "text" => {
+                                        if let Some(text) =
+                                            block.get("text").and_then(|v| v.as_str())
+                                            && !text.is_empty()
+                                        {
+                                            let _ = tx
+                                                .send(AgentEvent::Text {
+                                                    text: text.to_string(),
+                                                })
+                                                .await;
+                                        }
+                                    }
+                                    _ => {}
+                                }
+                            }
+                        }
+                    }
+                    "result" => {
+                        let cost = json
+                            .get("total_cost_usd")
+                            .and_then(|v| v.as_f64())
+                            .unwrap_or(0.0);
+                        let num_turns =
+                            json.get("num_turns").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
+                        let duration_ms = json
+                            .get("duration_ms")
+                            .and_then(|v| v.as_u64())
+                            .unwrap_or(0);
+                        let result_text = json
+                            .get("result")
+                            .and_then(|v| v.as_str())
+                            .unwrap_or("")
+                            .to_string();
+                        let result_session = json
+                            .get("session_id")
+                            .and_then(|v| v.as_str())
+                            .map(String::from)
+                            .or_else(|| session_id.clone());
+
+                        last_result = Some(AgentResult {
+                            output: result_text,
+                            cost_usd: cost,
+                            session_id: result_session,
+                            num_turns,
+                            duration_ms,
+                            timed_out: false,
+                        });
+                    }
+                    _ => {
+                        // Ignore unknown event types (partial messages, etc.)
+                    }
+                }
+            }
+        };
+
+        // Apply timeout
+        let timed_out = tokio::time::timeout(timeout, stream_future).await.is_err();
+
+        if timed_out {
+            tracing::warn!("claude agent timed out, killing process");
+            let _ = child.kill().await;
+        }
+
+        // Wait for process exit
+        let _ = child.wait().await;
+        let _ = stderr_handle.await;
+
+        if timed_out {
+            let result = AgentResult {
+                output: String::new(),
+                cost_usd: 0.0,
+                session_id,
+                num_turns: 0,
+                duration_ms: 0,
+                timed_out: true,
+            };
+            let _ = tx
+                .send(AgentEvent::Completed {
+                    result: result.clone(),
+                })
+                .await;
+            return Ok(result);
+        }
+
+        let result = last_result.unwrap_or(AgentResult {
+            output: String::new(),
+            cost_usd: 0.0,
+            session_id,
+            num_turns: 0,
+            duration_ms: 0,
+            timed_out: false,
+        });
+
+        let _ = tx
+            .send(AgentEvent::Completed {
+                result: result.clone(),
+            })
+            .await;
+        Ok(result)
+    });
+
+    Ok(AgentHandle {
+        join: handle,
+        events: rx,
+    })
+}
+
+/// Load an agent prompt from a markdown file, optionally embedding
+/// a CLAUDE.md from the target repo.
+pub async fn load_agent_prompt(agent_file: &Path, claude_md: Option<&Path>) -> Result<String> {
+    let mut prompt = tokio::fs::read_to_string(agent_file)
+        .await
+        .context(format!(
+            "failed to read agent file: {}",
+            agent_file.display()
+        ))?;
+
+    if let Some(claude_md_path) = claude_md {
+        let repo_claude = tokio::fs::read_to_string(claude_md_path)
+            .await
+            .context(format!(
+                "failed to read CLAUDE.md: {}",
+                claude_md_path.display()
+            ))?;
+        prompt = prompt.replace("{{CLAUDE_MD}}", &repo_claude);
+    }
+
+    Ok(prompt)
+}
+
+/// Invoke a Claude Code agent and return a legacy `AiResponse` for compatibility
+/// with the existing pipeline, while streaming events to the event bus.
+///
+/// This bridges `spawn_agent()` into the existing `run_task_pipeline()` flow.
+/// As the pipeline is thinned, callers will switch to using `spawn_agent()` directly.
+pub async fn invoke_streaming(
+    config: &AgentConfig,
+    event_bus: &crate::event_bus::EventBus,
+    agent_id: &thrum_core::agent::AgentId,
+    task_id: &thrum_core::task::TaskId,
+) -> Result<AiResponse> {
+    let handle = spawn_agent(config).await?;
+    let mut events = handle.events;
+    let join = handle.join;
+
+    // Forward stream events to the event bus as AgentOutput events
+    let bus = event_bus.clone();
+    let aid = agent_id.clone();
+    let tid = task_id.clone();
+    let forwarder = tokio::spawn(async move {
+        while let Some(event) = events.recv().await {
+            match &event {
+                AgentEvent::ToolCall { tool, input } => {
+                    let summary = if let Some(cmd) = input.get("command").and_then(|v| v.as_str()) {
+                        format!("{tool}: {cmd}")
+                    } else if let Some(fp) = input.get("file_path").and_then(|v| v.as_str()) {
+                        format!("{tool}: {fp}")
+                    } else {
+                        tool.clone()
+                    };
+                    bus.emit(thrum_core::event::EventKind::AgentOutput {
+                        agent_id: aid.clone(),
+                        task_id: tid.clone(),
+                        stream: thrum_core::event::OutputStream::Stdout,
+                        line: format!("[tool] {summary}"),
+                    });
+                }
+                AgentEvent::Text { text } => {
+                    bus.emit(thrum_core::event::EventKind::AgentOutput {
+                        agent_id: aid.clone(),
+                        task_id: tid.clone(),
+                        stream: thrum_core::event::OutputStream::Stdout,
+                        line: text.clone(),
+                    });
+                }
+                AgentEvent::ToolResult { tool, error } => {
+                    if *error {
+                        bus.emit(thrum_core::event::EventKind::AgentOutput {
+                            agent_id: aid.clone(),
+                            task_id: tid.clone(),
+                            stream: thrum_core::event::OutputStream::Stderr,
+                            line: format!("[tool error] {tool}"),
+                        });
+                    }
+                }
+                _ => {}
+            }
+        }
+    });
+
+    let result = join.await.context("agent task panicked")??;
+    let _ = forwarder.await;
+
+    Ok(AiResponse {
+        content: result.output,
+        model: "claude-opus-4-6".into(),
+        input_tokens: None,
+        output_tokens: None,
+        timed_out: result.timed_out,
+        exit_code: if result.timed_out { Some(-1) } else { Some(0) },
+        session_id: result.session_id,
+    })
+}
+
+/// Quick health check — verify the `claude` CLI is available.
+pub async fn health_check() -> Result<()> {
+    let output = Command::new("claude")
+        .arg("--version")
+        .output()
+        .await
+        .context("claude CLI not found")?;
+
+    if output.status.success() {
+        let version = String::from_utf8_lossy(&output.stdout);
+        tracing::info!(version = %version.trim(), "claude CLI available");
+        Ok(())
+    } else {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        anyhow::bail!("claude CLI not available: {stderr}")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn agent_config_defaults() {
+        let config = AgentConfig::new("test prompt", "/tmp/repo");
+        assert_eq!(config.model, "claude-opus-4-6");
+        assert_eq!(config.max_budget_usd, 5.0);
+        assert!(config.worktree);
+        assert_eq!(config.permission_mode, "auto");
+        assert_eq!(config.timeout_secs, 1200);
+        assert!(config.resume_session_id.is_none());
+        assert!(config.agent.is_none());
+    }
+
+    #[test]
+    fn agent_event_serialization() {
+        let event = AgentEvent::ToolCall {
+            tool: "Edit".into(),
+            input: serde_json::json!({"file_path": "/src/main.rs"}),
+        };
+        let json = serde_json::to_string(&event).unwrap();
+        assert!(json.contains("\"type\":\"tool_call\""));
+        assert!(json.contains("\"tool\":\"Edit\""));
+    }
+
+    #[test]
+    fn agent_result_defaults() {
+        let result = AgentResult {
+            output: "done".into(),
+            cost_usd: 0.42,
+            session_id: Some("ses-123".into()),
+            num_turns: 5,
+            duration_ms: 30000,
+            timed_out: false,
+        };
+        assert_eq!(result.cost_usd, 0.42);
+        assert!(!result.timed_out);
+    }
+
+    #[test]
+    fn parse_stream_json_system_event() {
+        let line = r#"{"type":"system","subtype":"init","session_id":"ses-abc","tools":["Bash","Read","Edit"]}"#;
+        let json: serde_json::Value = serde_json::from_str(line).unwrap();
+        assert_eq!(json["type"], "system");
+        assert_eq!(json["session_id"], "ses-abc");
+    }
+
+    #[test]
+    fn parse_stream_json_result_event() {
+        let line = r#"{"type":"result","subtype":"success","total_cost_usd":0.0653,"num_turns":3,"duration_ms":45000,"result":"Fixed the bug.","session_id":"ses-abc"}"#;
+        let json: serde_json::Value = serde_json::from_str(line).unwrap();
+        assert_eq!(json["type"], "result");
+        assert_eq!(json["total_cost_usd"], 0.0653);
+        assert_eq!(json["num_turns"], 3);
+    }
+
+    #[test]
+    fn parse_stream_json_assistant_tool_use() {
+        let line = r#"{"type":"assistant","message":{"content":[{"type":"tool_use","name":"Bash","input":{"command":"cargo test"}}]}}"#;
+        let json: serde_json::Value = serde_json::from_str(line).unwrap();
+        let content = json["message"]["content"].as_array().unwrap();
+        assert_eq!(content[0]["type"], "tool_use");
+        assert_eq!(content[0]["name"], "Bash");
+    }
+}
diff --git a/crates/thrum-runner/src/cli_agent.rs b/crates/thrum-runner/src/cli_agent.rs
deleted file mode 100644
index 1bd95ff..0000000
--- a/crates/thrum-runner/src/cli_agent.rs
+++ /dev/null
@@ -1,135 +0,0 @@
-//! Generic CLI agent backend for tools like Vibe, OpenCode, Aider, etc.
-//!
-//! These are agent-capable tools that run as CLI processes,
-//! similar to Claude Code but with different interfaces.
-//!
-//! Supports session continuation: when `AiRequest::resume_session_id` is set,
-//! appends the session flag (e.g., `-s {id}` for OpenCode) to resume context.
-
-use crate::backend::{AiBackend, AiRequest, AiResponse, BackendCapability};
-use crate::subprocess::run_cmd;
-use anyhow::Result;
-use async_trait::async_trait;
-use std::path::PathBuf;
-use std::time::Duration;
-
-/// A generic CLI-based AI agent.
-pub struct CliAgentBackend {
-    /// Display name (e.g., "vibe", "opencode", "aider").
-    pub name: String,
-    /// The CLI command to invoke (e.g., "vibe", "opencode").
-    pub command: String,
-    /// How to pass the prompt (e.g., ["-m", "{prompt}"] or ["{prompt}"]).
-    /// Use `{prompt}` as placeholder for the actual prompt text.
-    pub prompt_args: Vec<String>,
-    /// Model name this tool uses.
-    pub model_name: String,
-    /// Default working directory.
-    pub default_cwd: PathBuf,
-    /// Session timeout.
-    pub timeout: Duration,
-    /// Flag for session continuation (e.g., "-s" for OpenCode).
-    /// When set, `--resume_session_id` causes `{session_flag} {id}` to be appended.
-    pub session_flag: Option<String>,
-}
-
-impl CliAgentBackend {
-    /// Create a Vibe backend.
-    pub fn vibe(default_cwd: PathBuf) -> Self {
-        Self {
-            name: "vibe".into(),
-            command: "vibe".into(),
-            prompt_args: vec!["-m".into(), "{prompt}".into()],
-            model_name: "devstral-small-2505".into(),
-            default_cwd,
-            timeout: Duration::from_secs(1200),
-            session_flag: None,
-        }
-    }
-
-    /// Create an OpenCode backend.
-    pub fn opencode(default_cwd: PathBuf) -> Self {
-        Self {
-            name: "opencode".into(),
-            command: "opencode".into(),
-            prompt_args: vec!["-m".into(), "{prompt}".into()],
-            model_name: "devstral-small-2505".into(),
-            default_cwd,
-            timeout: Duration::from_secs(1200),
-            session_flag: Some("-s".into()),
-        }
-    }
-}
-
-#[async_trait]
-impl AiBackend for CliAgentBackend {
-    fn name(&self) -> &str {
-        &self.name
-    }
-
-    fn capability(&self) -> BackendCapability {
-        BackendCapability::Agent
-    }
-
-    fn model(&self) -> &str {
-        &self.model_name
-    }
-
-    async fn invoke(&self, request: &AiRequest) -> Result<AiResponse> {
-        let cwd = request.cwd.as_deref().unwrap_or(&self.default_cwd);
-
-        // Build command with prompt substitution
-        let escaped = request.prompt.replace('\'', "'\\''");
-        let args: Vec<String> = self
-            .prompt_args
-            .iter()
-            .map(|a| a.replace("{prompt}", &format!("'{escaped}'")))
-            .collect();
-
-        let mut cmd = format!("{} {}", self.command, args.join(" "));
-
-        // Session continuation: append session flag if backend supports it
-        if let (Some(flag), Some(session_id)) = (&self.session_flag, &request.resume_session_id) {
-            cmd.push_str(&format!(" {flag} {session_id}"));
-            tracing::info!(
-                agent = %self.name,
-                session_id = session_id.as_str(),
-                "resuming CLI agent session"
-            );
-        }
-
-        tracing::info!(
-            agent = %self.name,
-            prompt_len = request.prompt.len(),
-            cwd = %cwd.display(),
-            "invoking CLI agent"
-        );
-
-        let output = run_cmd(&cmd, cwd, self.timeout).await?;
-
-        Ok(AiResponse {
-            content: output.stdout,
-            model: self.model_name.clone(),
-            input_tokens: None,
-            output_tokens: None,
-            timed_out: output.timed_out,
-            exit_code: Some(output.exit_code),
-            session_id: None, // Generic CLI agents don't yet report session IDs
-        })
-    }
-
-    async fn health_check(&self) -> Result<()> {
-        let output = run_cmd(
-            &format!("{} --version", self.command),
-            &self.default_cwd,
-            Duration::from_secs(5),
-        )
-        .await?;
-
-        if output.success() {
-            Ok(())
-        } else {
-            anyhow::bail!("{} CLI not available", self.name)
-        }
-    }
-}
diff --git a/crates/thrum-runner/src/event_bus.rs b/crates/thrum-runner/src/event_bus.rs
index 3b94868..9acc01a 100644
--- a/crates/thrum-runner/src/event_bus.rs
+++ b/crates/thrum-runner/src/event_bus.rs
@@ -82,6 +82,7 @@ mod tests {
             agent_id: AgentId("agent-1".into()),
             task_id: TaskId(1),
             repo: RepoName::new("loom"),
+            task_title: "Test task".into(),
         });
 
         let event = rx.recv().await.unwrap();
diff --git a/crates/thrum-runner/src/git.rs b/crates/thrum-runner/src/git.rs
index e5d8cb6..cc895e6 100644
--- a/crates/thrum-runner/src/git.rs
+++ b/crates/thrum-runner/src/git.rs
@@ -1,6 +1,7 @@
 use anyhow::{Context, Result};
 use git2::{BranchType, MergeOptions, Repository, Signature};
 use std::path::Path;
+use std::time::SystemTime;
 
 /// Git operations on a repository using libgit2.
 pub struct GitRepo {
@@ -35,7 +36,9 @@ impl GitRepo {
     /// Create a new branch from the current HEAD.
     pub fn create_branch(&self, name: &str) -> Result<()> {
         let head_commit = self.repo.head()?.peel_to_commit()?;
-        self.repo.branch(name, &head_commit, false)?;
+        // force=true: if the branch already exists (e.g. from a previous run
+        // that was killed), reset it to current HEAD instead of failing.
+        self.repo.branch(name, &head_commit, true)?;
         // Checkout the new branch
         let refname = format!("refs/heads/{name}");
         let obj = self.repo.revparse_single(&refname)?;
@@ -44,14 +47,19 @@ impl GitRepo {
         Ok(())
     }
 
-    /// Create a branch ref without checking it out.
+    /// Create a branch ref without checking it out, or update it to HEAD if
+    /// it already exists.
     ///
     /// Used when creating worktrees: the branch must exist as a ref but must
     /// NOT be checked out in the main working directory, otherwise
     /// `git worktree add` will fail with "already used by worktree".
+    ///
+    /// Uses `force=true` so that existing branches (e.g. from a previous run)
+    /// are updated to the current HEAD instead of silently keeping a stale
+    /// commit pointer.
     pub fn create_branch_detached(&self, name: &str) -> Result<()> {
         let head_commit = self.repo.head()?.peel_to_commit()?;
-        self.repo.branch(name, &head_commit, false)?;
+        self.repo.branch(name, &head_commit, true)?;
         Ok(())
     }
 
@@ -101,6 +109,29 @@ impl GitRepo {
         Ok(revwalk.next().is_some())
     }
 
+    /// Get list of files changed on a branch relative to the default branch.
+    pub fn changed_files_on_branch(&self, _branch: &str) -> Result<Vec<String>> {
+        let main = self.default_branch()?;
+        let main_ref = format!("refs/heads/{main}");
+        let main_commit = self.repo.revparse_single(&main_ref)?.peel_to_commit()?;
+        let head_commit = self.repo.head()?.peel_to_commit()?;
+
+        let main_tree = main_commit.tree()?;
+        let head_tree = head_commit.tree()?;
+
+        let diff = self
+            .repo
+            .diff_tree_to_tree(Some(&main_tree), Some(&head_tree), None)?;
+
+        let mut files = Vec::new();
+        for delta in diff.deltas() {
+            if let Some(path) = delta.new_file().path() {
+                files.push(path.display().to_string());
+            }
+        }
+        Ok(files)
+    }
+
     /// Get a diff summary between the default branch and HEAD.
     pub fn diff_summary(&self) -> Result<String> {
         let main = self.default_branch()?;
@@ -124,6 +155,43 @@ impl GitRepo {
         ))
     }
 
+    /// Get a diff summary between the default branch and a named branch.
+    ///
+    /// Unlike `diff_summary()` (which compares main vs HEAD), this compares
+    /// main vs a specific branch — essential when the git repo is opened
+    /// on the main worktree but we want stats for a task branch.
+    pub fn diff_summary_for_branch(&self, branch: &str) -> Result<String> {
+        let main = self.default_branch()?;
+        let main_ref = format!("refs/heads/{main}");
+        let branch_ref = format!("refs/heads/{branch}");
+
+        let main_commit = self
+            .repo
+            .revparse_single(&main_ref)?
+            .peel_to_commit()
+            .context(format!("failed to resolve default branch '{main}'"))?;
+        let branch_commit = self
+            .repo
+            .revparse_single(&branch_ref)?
+            .peel_to_commit()
+            .context(format!("failed to resolve branch '{branch}'"))?;
+
+        let main_tree = main_commit.tree()?;
+        let branch_tree = branch_commit.tree()?;
+
+        let diff = self
+            .repo
+            .diff_tree_to_tree(Some(&main_tree), Some(&branch_tree), None)?;
+
+        let stats = diff.stats()?;
+        Ok(format!(
+            "{} files changed, {} insertions(+), {} deletions(-)",
+            stats.files_changed(),
+            stats.insertions(),
+            stats.deletions()
+        ))
+    }
+
     /// Get the full unified diff (patch) between the default branch and a named branch.
     ///
     /// Returns the diff as plain text in unified diff format, suitable for
@@ -264,11 +332,20 @@ impl GitRepo {
 
     /// Detect the default branch (main or master).
     fn default_branch(&self) -> Result<String> {
-        if self.repo.find_branch("main", BranchType::Local).is_ok() {
-            Ok("main".to_string())
-        } else {
-            Ok("master".to_string())
+        // Check local branches first
+        for name in &["main", "master"] {
+            if self.repo.find_branch(name, BranchType::Local).is_ok() {
+                return Ok(name.to_string());
+            }
         }
+        // In worktrees, local branch lookup can fail. Check refs directly.
+        for name in &["main", "master"] {
+            let refname = format!("refs/heads/{name}");
+            if self.repo.revparse_single(&refname).is_ok() {
+                return Ok(name.to_string());
+            }
+        }
+        anyhow::bail!("no default branch found (tried main, master)")
     }
 
     /// Get or create a signature for commits.
@@ -280,6 +357,94 @@ impl GitRepo {
     }
 }
 
+/// Filesystem-level check for recently modified source files in a worktree.
+///
+/// This is a git-independent fallback used when both `is_clean()` and
+/// `has_commits_beyond_main()` report no changes. If the git index is stale
+/// or corrupted (e.g. due to lock contention between concurrent agents), this
+/// catches the case where real files were modified on disk.
+///
+/// Checks for common source file extensions modified within the last 24 hours.
+/// Returns `true` if any modified source files are found, `false` otherwise.
+/// Never errors — returns `false` on any I/O failure.
+pub fn has_modified_source_files(work_dir: &Path) -> bool {
+    let cutoff = match SystemTime::now().checked_sub(std::time::Duration::from_secs(24 * 3600)) {
+        Some(t) => t,
+        None => return false,
+    };
+
+    // Source extensions we care about — if any of these were recently touched,
+    // the agent likely did real work.
+    let source_extensions: &[&str] = &[
+        "rs", "toml", "md", "json", "yaml", "yml", "ts", "tsx", "js", "jsx", "py", "go", "c",
+        "cpp", "h", "hpp", "java", "kt", "swift", "sh", "css", "html", "sql", "lock",
+    ];
+
+    walk_for_recent_sources(work_dir, &cutoff, source_extensions, 0)
+}
+
+/// Recursively walk directories looking for recently modified source files.
+/// Limits recursion depth to avoid traversing deeply nested node_modules etc.
+fn walk_for_recent_sources(
+    dir: &Path,
+    cutoff: &SystemTime,
+    extensions: &[&str],
+    depth: usize,
+) -> bool {
+    const MAX_DEPTH: usize = 8;
+    if depth > MAX_DEPTH {
+        return false;
+    }
+
+    let entries = match std::fs::read_dir(dir) {
+        Ok(e) => e,
+        Err(_) => return false,
+    };
+
+    for entry in entries.flatten() {
+        let path = entry.path();
+        let name = entry.file_name();
+        let name_str = name.to_string_lossy();
+
+        // Skip hidden dirs and common noise directories
+        if name_str.starts_with('.')
+            || name_str == "target"
+            || name_str == "node_modules"
+            || name_str == "__pycache__"
+        {
+            continue;
+        }
+
+        if path.is_dir() {
+            if walk_for_recent_sources(&path, cutoff, extensions, depth + 1) {
+                return true;
+            }
+        } else if path.is_file() {
+            // Check extension
+            let ext_match = path
+                .extension()
+                .and_then(|e| e.to_str())
+                .is_some_and(|ext| extensions.contains(&ext));
+            if !ext_match {
+                continue;
+            }
+            // Check mtime
+            if let Ok(meta) = path.metadata()
+                && let Ok(mtime) = meta.modified()
+                && mtime > *cutoff
+            {
+                tracing::debug!(
+                    path = %path.display(),
+                    "found recently modified source file"
+                );
+                return true;
+            }
+        }
+    }
+
+    false
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -387,4 +552,82 @@ mod tests {
         assert!(committed);
         assert!(!lock_path.exists());
     }
+
+    #[test]
+    fn create_branch_detached_updates_existing_branch_to_head() {
+        let (dir, git) = init_test_repo();
+
+        // Create a detached branch at the initial commit.
+        git.create_branch_detached("feature-x").unwrap();
+        let initial_sha = git.head_sha().unwrap();
+
+        // Advance HEAD with a new commit on main.
+        std::fs::write(dir.path().join("new.txt"), "content").unwrap();
+        git_in(dir.path(), &["add", "."]);
+        git_in(dir.path(), &["commit", "-m", "second"]);
+        let advanced_sha = git.head_sha().unwrap();
+        assert_ne!(initial_sha, advanced_sha);
+
+        // Calling create_branch_detached again must update the branch to the
+        // new HEAD, not leave it pointing at the old commit.
+        git.create_branch_detached("feature-x").unwrap();
+
+        let branch = git
+            .repo
+            .find_branch("feature-x", BranchType::Local)
+            .unwrap();
+        let branch_sha = branch.get().target().unwrap().to_string();
+        assert_eq!(branch_sha, advanced_sha);
+    }
+
+    #[test]
+    fn has_modified_source_files_detects_recent_source_files() {
+        let dir = tempfile::tempdir().unwrap();
+        let p = dir.path();
+
+        // Empty directory — no source files
+        assert!(!super::has_modified_source_files(p));
+
+        // Write a non-source file — should not match
+        std::fs::write(p.join("readme.txt"), "hello").unwrap();
+        assert!(!super::has_modified_source_files(p));
+
+        // Write a source file — should match (mtime is now)
+        std::fs::write(p.join("lib.rs"), "fn main() {}").unwrap();
+        assert!(super::has_modified_source_files(p));
+    }
+
+    #[test]
+    fn has_modified_source_files_ignores_hidden_and_target_dirs() {
+        let dir = tempfile::tempdir().unwrap();
+        let p = dir.path();
+
+        // Create a source file inside .git/ — should be ignored
+        std::fs::create_dir_all(p.join(".git")).unwrap();
+        std::fs::write(p.join(".git/config.rs"), "fn git() {}").unwrap();
+        assert!(!super::has_modified_source_files(p));
+
+        // Create a source file inside target/ — should be ignored
+        std::fs::create_dir_all(p.join("target")).unwrap();
+        std::fs::write(p.join("target/build.rs"), "fn build() {}").unwrap();
+        assert!(!super::has_modified_source_files(p));
+    }
+
+    #[test]
+    fn has_modified_source_files_detects_nested_source_files() {
+        let dir = tempfile::tempdir().unwrap();
+        let p = dir.path();
+
+        // Create a nested source file
+        std::fs::create_dir_all(p.join("src/core")).unwrap();
+        std::fs::write(p.join("src/core/main.rs"), "fn main() {}").unwrap();
+        assert!(super::has_modified_source_files(p));
+    }
+
+    #[test]
+    fn has_modified_source_files_nonexistent_dir() {
+        // Non-existent directory — should return false, not panic
+        let p = Path::new("/nonexistent/path/that/does/not/exist");
+        assert!(!super::has_modified_source_files(p));
+    }
 }
diff --git a/crates/thrum-runner/src/lib.rs b/crates/thrum-runner/src/lib.rs
index 67176f1..5e05013 100644
--- a/crates/thrum-runner/src/lib.rs
+++ b/crates/thrum-runner/src/lib.rs
@@ -1,14 +1,13 @@
-pub mod anthropic;
-pub mod backend;
-pub mod claude;
-pub mod cli_agent;
+pub mod ci;
+pub mod claude_code;
 pub mod coordination_hub;
 pub mod event_bus;
 pub mod git;
-pub mod openai_compat;
 pub mod parallel;
 pub mod sandbox;
 pub mod session_export;
+pub mod shutdown;
 pub mod subprocess;
+pub mod sync;
 pub mod watcher;
 pub mod worktree;
diff --git a/crates/thrum-runner/src/openai_compat.rs b/crates/thrum-runner/src/openai_compat.rs
deleted file mode 100644
index 2937d0d..0000000
--- a/crates/thrum-runner/src/openai_compat.rs
+++ /dev/null
@@ -1,191 +0,0 @@
-//! OpenAI-compatible API backend for Mistral/Devstral2 and other providers.
-//!
-//! Uses `async-openai` pointed at any OpenAI-compatible endpoint.
-//! Chat-only: returns text, cannot edit files or run commands.
-//!
-//! Supported providers:
-//! - Mistral AI (Devstral2): `https://api.mistral.ai/v1`
-//! - OpenAI: `https://api.openai.com/v1`
-//! - Any OpenAI-compatible server (vLLM, Ollama, etc.)
-
-use crate::backend::{AiBackend, AiRequest, AiResponse, BackendCapability};
-use anyhow::{Context, Result};
-use async_openai::Client;
-use async_openai::config::OpenAIConfig;
-use async_openai::types::{
-    ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage,
-    ChatCompletionRequestUserMessage, CreateChatCompletionRequestArgs,
-};
-use async_trait::async_trait;
-
-/// Well-known provider presets.
-pub enum Provider {
-    /// Mistral AI (Devstral2, Codestral, etc.)
-    Mistral,
-    /// OpenAI (GPT-4o, etc.)
-    OpenAi,
-    /// Custom endpoint.
-    Custom { base_url: String },
-}
-
-impl Provider {
-    fn base_url(&self) -> &str {
-        match self {
-            Provider::Mistral => "https://api.mistral.ai/v1",
-            Provider::OpenAi => "https://api.openai.com/v1",
-            Provider::Custom { base_url } => base_url,
-        }
-    }
-
-    fn env_key_name(&self) -> &str {
-        match self {
-            Provider::Mistral => "MISTRAL_API_KEY",
-            Provider::OpenAi => "OPENAI_API_KEY",
-            Provider::Custom { .. } => "OPENAI_API_KEY",
-        }
-    }
-}
-
-/// OpenAI-compatible chat backend.
-pub struct OpenAiCompatBackend {
-    client: Client<OpenAIConfig>,
-    provider_name: String,
-    model: String,
-    max_tokens: u16,
-}
-
-impl OpenAiCompatBackend {
-    /// Create with explicit API key.
-    pub fn new(provider: Provider, api_key: String, model: String) -> Self {
-        let config = OpenAIConfig::new()
-            .with_api_base(provider.base_url())
-            .with_api_key(&api_key);
-
-        let provider_name = match &provider {
-            Provider::Mistral => "mistral".to_string(),
-            Provider::OpenAi => "openai".to_string(),
-            Provider::Custom { base_url } => format!("custom({base_url})"),
-        };
-
-        Self {
-            client: Client::with_config(config),
-            provider_name,
-            model,
-            max_tokens: 4096,
-        }
-    }
-
-    /// Create from environment variable.
-    pub fn from_env(provider: Provider, model: &str) -> Result<Self> {
-        let env_key = provider.env_key_name();
-        let api_key = std::env::var(env_key).context(format!("{env_key} not set"))?;
-        Ok(Self::new(provider, api_key, model.to_string()))
-    }
-
-    /// Convenience: create a Mistral/Devstral2 backend.
-    pub fn devstral(api_key: String) -> Self {
-        Self::new(Provider::Mistral, api_key, "devstral-small-2505".into())
-    }
-
-    pub fn with_max_tokens(mut self, max_tokens: u16) -> Self {
-        self.max_tokens = max_tokens;
-        self
-    }
-}
-
-#[async_trait]
-impl AiBackend for OpenAiCompatBackend {
-    fn name(&self) -> &str {
-        &self.provider_name
-    }
-
-    fn capability(&self) -> BackendCapability {
-        BackendCapability::Chat
-    }
-
-    fn model(&self) -> &str {
-        &self.model
-    }
-
-    async fn invoke(&self, request: &AiRequest) -> Result<AiResponse> {
-        let mut messages: Vec<ChatCompletionRequestMessage> = Vec::new();
-
-        if let Some(ref sys) = request.system_prompt {
-            messages.push(ChatCompletionRequestMessage::System(
-                ChatCompletionRequestSystemMessage::from(sys.as_str()),
-            ));
-        }
-
-        messages.push(ChatCompletionRequestMessage::User(
-            ChatCompletionRequestUserMessage::from(request.prompt.as_str()),
-        ));
-
-        let max_tokens = request.max_tokens.unwrap_or(self.max_tokens as u32);
-
-        let mut req_builder = CreateChatCompletionRequestArgs::default();
-        req_builder
-            .model(&self.model)
-            .messages(messages)
-            .max_tokens(max_tokens);
-
-        if let Some(temp) = request.temperature {
-            req_builder.temperature(temp);
-        }
-
-        let api_request = req_builder
-            .build()
-            .context("failed to build chat completion request")?;
-
-        tracing::info!(
-            provider = %self.provider_name,
-            model = %self.model,
-            prompt_len = request.prompt.len(),
-            "invoking OpenAI-compatible API"
-        );
-
-        let response = self
-            .client
-            .chat()
-            .create(api_request)
-            .await
-            .context("OpenAI-compatible API call failed")?;
-
-        let content = response
-            .choices
-            .first()
-            .and_then(|c| c.message.content.as_deref())
-            .unwrap_or("")
-            .to_string();
-
-        let (input_tokens, output_tokens) = response
-            .usage
-            .map(|u| {
-                (
-                    Some(u.prompt_tokens as u64),
-                    Some(u.completion_tokens as u64),
-                )
-            })
-            .unwrap_or((None, None));
-
-        Ok(AiResponse {
-            content,
-            model: response.model,
-            input_tokens,
-            output_tokens,
-            timed_out: false,
-            exit_code: None,
-            session_id: None,
-        })
-    }
-
-    async fn health_check(&self) -> Result<()> {
-        // List models as a basic connectivity check
-        let _models = self
-            .client
-            .models()
-            .list()
-            .await
-            .context("failed to list models — check API key and endpoint")?;
-        Ok(())
-    }
-}
diff --git a/crates/thrum-runner/src/parallel.rs b/crates/thrum-runner/src/parallel.rs
index bf64206..c86127e 100644
--- a/crates/thrum-runner/src/parallel.rs
+++ b/crates/thrum-runner/src/parallel.rs
@@ -6,14 +6,13 @@
 //! - Atomic task claiming via redb single-writer transactions
 //! - Graceful shutdown via CancellationToken
 
-use crate::backend::BackendRegistry;
 use crate::coordination_hub::CoordinationHub;
 use crate::event_bus::EventBus;
 use anyhow::{Context, Result};
 use std::collections::HashMap;
 use std::path::PathBuf;
 use std::sync::Arc;
-use std::time::Duration;
+use std::time::{Duration, Instant};
 use thrum_core::agent::{AgentId, AgentSession};
 use thrum_core::budget::BudgetTracker;
 use thrum_core::coordination::ConflictPolicy;
@@ -56,7 +55,6 @@ pub struct PipelineContext {
     pub db: Arc<redb::Database>,
     pub repos_config: Arc<ReposConfig>,
     pub agents_dir: PathBuf,
-    pub registry: Arc<BackendRegistry>,
     pub session_budget_usd: Option<f64>,
     /// Shared budget tracker for global spending enforcement.
     /// Protected by a mutex for thread-safe concurrent access.
@@ -77,6 +75,75 @@ pub struct PipelineContext {
     pub coordination: CoordinationHub,
     /// Policy for handling file conflicts between concurrent agents.
     pub conflict_policy: ConflictPolicy,
+    /// Process tracker for graceful shutdown of spawned agent subprocesses.
+    pub process_tracker: crate::shutdown::ProcessTracker,
+    /// Per-repo cooldown tracker: prevents dispatching tasks to a repo
+    /// that has recently experienced consecutive failures.
+    pub repo_cooldowns: RepoCooldownTracker,
+}
+
+/// Tracks per-repo cooldown state to prevent rapid retry churn.
+///
+/// When multiple tasks for the same repo fail in sequence, the engine applies
+/// an escalating cooldown before dispatching further work to that repo. This
+/// prevents burning through retries when a systemic issue (e.g., broken
+/// dependency, misconfigured toolchain) affects all tasks in a repo.
+#[derive(Clone, Default)]
+pub struct RepoCooldownTracker {
+    inner: Arc<Mutex<HashMap<String, RepoCooldownState>>>,
+}
+
+/// Internal per-repo cooldown state.
+struct RepoCooldownState {
+    last_failure: Instant,
+    consecutive_failures: u32,
+}
+
+/// Base cooldown in seconds per consecutive failure (capped at 300s).
+const REPO_COOLDOWN_BASE_SECS: u64 = 60;
+
+impl RepoCooldownTracker {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Record a task failure for the given repo, incrementing its consecutive
+    /// failure count and resetting the cooldown timer.
+    pub async fn record_failure(&self, repo: &str) {
+        let mut map = self.inner.lock().await;
+        let state = map.entry(repo.to_string()).or_insert(RepoCooldownState {
+            last_failure: Instant::now(),
+            consecutive_failures: 0,
+        });
+        state.consecutive_failures += 1;
+        state.last_failure = Instant::now();
+    }
+
+    /// Clear cooldown state for a repo after a successful task completion.
+    pub async fn record_success(&self, repo: &str) {
+        let mut map = self.inner.lock().await;
+        map.remove(repo);
+    }
+
+    /// Check if a repo is currently in cooldown.
+    ///
+    /// Returns `Some(remaining_secs)` if the repo should not receive new tasks
+    /// yet, or `None` if dispatching is allowed.
+    pub async fn check_cooldown(&self, repo: &str) -> Option<u64> {
+        let map = self.inner.lock().await;
+        let state = map.get(repo)?;
+
+        // Scale: 1×base for 1 failure, 2×base for 2, 3×base for 3+, capped at 300s
+        let multiplier = state.consecutive_failures.clamp(1, 3) as u64;
+        let cooldown_secs = (multiplier * REPO_COOLDOWN_BASE_SECS).min(300);
+        let elapsed = state.last_failure.elapsed().as_secs();
+
+        if elapsed < cooldown_secs {
+            Some(cooldown_secs - elapsed)
+        } else {
+            None
+        }
+    }
 }
 
 /// Result of a single agent run.
@@ -132,10 +199,14 @@ pub async fn run_parallel(
         ),
     });
 
-    // Recover stuck tasks from a previous engine run.
-    // Tasks in "claimed", "implementing", or "integrating" state with no
-    // corresponding agent are orphaned — reset them to a dispatchable state.
-    recover_stuck_tasks(&ctx.db, &ctx.event_bus)?;
+    // Run comprehensive startup recovery: kill orphaned processes, clean
+    // stale worktrees, reset stuck tasks, check repos for leaked changes.
+    crate::shutdown::run_startup_recovery(
+        &ctx.db,
+        &ctx.event_bus,
+        &ctx.worktrees_dir,
+        &ctx.repos_config,
+    )?;
 
     loop {
         if shutdown.is_cancelled() {
@@ -148,6 +219,10 @@ pub async fn run_parallel(
             reap_agent_result(result, &ctx.event_bus);
         }
 
+        // Process AwaitingCI tasks: poll their CI status and handle pass/fail.
+        // This runs each iteration but tasks self-manage their polling interval.
+        let ci_dispatched = dispatch_ci_tasks(&ctx, repo_filter.as_ref(), &mut join_set).await?;
+
         // Dispatch batch: try to claim and spawn agents
         let dispatched = dispatch_batch(
             &ctx,
@@ -159,12 +234,14 @@ pub async fn run_parallel(
         )
         .await?;
 
-        if dispatched == 0 && join_set.is_empty() {
+        let total_dispatched = dispatched + ci_dispatched;
+
+        if total_dispatched == 0 && join_set.is_empty() {
             tracing::info!("no tasks to dispatch and no agents in flight, exiting");
             break;
         }
 
-        if dispatched == 0 {
+        if total_dispatched == 0 {
             // Nothing new to dispatch; wait for an agent to finish or poll interval
             tokio::select! {
                 _ = shutdown.cancelled() => {
@@ -179,15 +256,22 @@ pub async fn run_parallel(
         }
     }
 
-    // Graceful drain: give in-flight agents a short window to finish,
-    // then abort them. Without this, Ctrl+C blocks for 20+ minutes
-    // waiting for long-running Claude invocations to complete.
+    // Graceful shutdown: kill agent child processes, then give tokio tasks
+    // a window to finish. This is a two-phase approach:
+    //   Phase 1: SIGTERM all tracked agent PIDs (claude -p processes), wait 30s, SIGKILL
+    //   Phase 2: Abort remaining tokio tasks (should be fast since children are dead)
     if !join_set.is_empty() {
+        let inflight = join_set.len();
         tracing::info!(
-            count = join_set.len(),
-            "waiting up to 10s for in-flight agents to complete (Ctrl+C again to force quit)"
+            count = inflight,
+            "shutting down: killing agent processes and draining tasks"
         );
-        let drain_deadline = tokio::time::sleep(Duration::from_secs(10));
+
+        // Phase 1: Kill tracked child processes with SIGTERM → 30s → SIGKILL.
+        ctx.process_tracker.kill_all(Duration::from_secs(30)).await;
+
+        // Phase 2: Wait briefly for tokio tasks to notice their children died.
+        let drain_deadline = tokio::time::sleep(Duration::from_secs(5));
         tokio::pin!(drain_deadline);
         loop {
             tokio::select! {
@@ -200,7 +284,7 @@ pub async fn run_parallel(
                 _ = &mut drain_deadline => {
                     tracing::warn!(
                         remaining = join_set.len(),
-                        "drain timeout — aborting remaining agents"
+                        "drain timeout — aborting remaining tokio tasks"
                     );
                     join_set.abort_all();
                     // Collect the abort results
@@ -254,6 +338,17 @@ pub async fn run_parallel(
         }
     }
 
+    // Run shutdown cleanup: reset in-flight tasks, clean worktrees, check repos.
+    crate::shutdown::run_shutdown_cleanup(
+        &ctx.db,
+        &ctx.event_bus,
+        &ctx.process_tracker,
+        &ctx.worktrees_dir,
+        &ctx.repos_config,
+        Duration::from_secs(5), // Extra grace for any stragglers
+    )
+    .await;
+
     tracing::info!("parallel engine stopped");
     ctx.event_bus.emit(EventKind::EngineLog {
         level: thrum_core::event::LogLevel::Info,
@@ -295,6 +390,85 @@ fn reap_agent_result(result: Result<AgentResult, tokio::task::JoinError>, event_
     }
 }
 
+/// Check for tasks in AwaitingCI status and spawn CI polling loops for them.
+///
+/// Returns the number of CI tasks dispatched. CI tasks run asynchronously
+/// and don't consume the global agent semaphore — they primarily wait on
+/// external CI systems and only briefly use compute when dispatching
+/// ci_fixer agents.
+async fn dispatch_ci_tasks(
+    ctx: &Arc<PipelineContext>,
+    repo_filter: Option<&RepoName>,
+    join_set: &mut JoinSet<AgentResult>,
+) -> Result<usize> {
+    let task_store = TaskStore::new(&ctx.db);
+    let all_tasks = task_store.list(None, None)?;
+    let mut dispatched = 0;
+
+    for task in all_tasks {
+        if !task.status.is_awaiting_ci() {
+            continue;
+        }
+
+        // Apply repo filter
+        if let Some(filter) = repo_filter
+            && &task.repo != filter
+        {
+            continue;
+        }
+
+        // Get the repo config
+        let repo_config = match ctx.repos_config.get(&task.repo) {
+            Some(rc) => rc,
+            None => continue,
+        };
+
+        // CI must be enabled
+        if !repo_config.ci.as_ref().is_some_and(|ci| ci.enabled) {
+            continue;
+        }
+
+        let agent_id = thrum_core::agent::AgentId(format!("ci-poller-{}", task.id));
+        let repo_path = repo_config.path.clone();
+        let agents_dir = ctx.agents_dir.clone();
+        let roles = ctx.roles.clone();
+        let _worktrees_dir = ctx.worktrees_dir.clone();
+        let ctx_clone = Arc::clone(ctx);
+
+        let session = thrum_core::agent::AgentSession::new(
+            agent_id,
+            task.id.clone(),
+            task.repo.clone(),
+            repo_path.clone(),
+        );
+
+        tracing::info!(
+            task_id = %task.id,
+            "dispatching CI polling task"
+        );
+
+        join_set.spawn(async move {
+            let mut session = session;
+            let task_store = TaskStore::new(&ctx_clone.db);
+            let outcome = crate::ci::run_ci_loop(
+                &task_store,
+                &ctx_clone.event_bus,
+                &repo_path,
+                &agents_dir,
+                roles.as_deref(),
+                task,
+            )
+            .await;
+            session.finish();
+            AgentResult { session, outcome }
+        });
+
+        dispatched += 1;
+    }
+
+    Ok(dispatched)
+}
+
 /// Try to dispatch agents for each claim category in priority order.
 ///
 /// Returns the number of agents spawned this batch.
@@ -319,6 +493,13 @@ async fn dispatch_batch(
     let mut dispatched = 0;
     let use_worktrees = config.per_repo_limit > 1;
 
+    // Compute completed task IDs for dependency checking.
+    let task_store = TaskStore::new(&ctx.db);
+    let completed_ids = task_store.completed_task_ids()?;
+
+    // Emit predicted conflict warnings for tasks that are about to be dispatched.
+    emit_predicted_conflicts(&task_store, &ctx.event_bus, repo_filter)?;
+
     for &category in &categories {
         loop {
             // Check global capacity
@@ -327,9 +508,14 @@ async fn dispatch_batch(
                 Err(_) => break, // At capacity
             };
 
-            // Try to claim a task
+            // Try to claim a task, respecting dependency ordering
             let task_store = TaskStore::new(&ctx.db);
-            let claimed = task_store.claim_next("pre-dispatch", category, repo_filter)?;
+            let claimed = task_store.claim_next_with_deps(
+                "pre-dispatch",
+                category,
+                repo_filter,
+                &completed_ids,
+            )?;
 
             let task = match claimed {
                 Some(t) => t,
@@ -356,6 +542,27 @@ async fn dispatch_batch(
                 }
             };
 
+            // Check per-repo cooldown: if this repo has consecutive failures,
+            // skip dispatching until the cooldown period expires.
+            if let Some(remaining_secs) = ctx.repo_cooldowns.check_cooldown(&repo_key).await {
+                tracing::info!(
+                    repo = %repo_key,
+                    remaining_secs,
+                    "repo in cooldown after consecutive failures, deferring dispatch"
+                );
+                ctx.event_bus.emit(EventKind::EngineLog {
+                    level: thrum_core::event::LogLevel::Info,
+                    message: format!(
+                        "repo '{}' in cooldown ({}s remaining) — deferring task {}",
+                        repo_key, remaining_secs, task.id
+                    ),
+                });
+                unclaim_task(&ctx.db, &task, category)?;
+                drop(repo_permit);
+                drop(global_permit);
+                break;
+            }
+
             // Generate agent ID and spawn
             let agent_id = AgentId::generate(&task.repo, &task.id);
             let repo_config = ctx
@@ -373,7 +580,13 @@ async fn dispatch_batch(
                 // Use create_branch_detached to avoid checking out the branch
                 // in the main working directory — git won't allow the same branch
                 // to be checked out in two worktrees simultaneously.
-                let _ = git.create_branch_detached(&branch);
+                if let Err(e) = git.create_branch_detached(&branch) {
+                    tracing::warn!(
+                        branch,
+                        error = %e,
+                        "failed to create/update branch ref — worktree may use stale code"
+                    );
+                }
 
                 let wt = git.create_worktree(&branch, &ctx.worktrees_dir)?;
                 let path = wt.path.clone();
@@ -421,6 +634,7 @@ async fn dispatch_batch(
                 agent_id: agent_id.clone(),
                 task_id: task.id.clone(),
                 repo: task.repo.clone(),
+                task_title: task.title.clone(),
             });
 
             let ctx = Arc::clone(ctx);
@@ -494,6 +708,74 @@ async fn run_agent_task(
     // or main repo path (single-agent mode).
     let work_dir = worktree.map(|wt| wt.path.clone());
 
+    // Set up seatbelt sandbox for macOS.
+    //
+    // "os-native": enforce the seatbelt profile (wraps agent with sandbox-exec).
+    // "observe":   run without enforcement, but write the profile and audit
+    //              filesystem writes after execution to log would-be violations.
+    let sandbox_backend = ctx
+        .sandbox_config
+        .as_ref()
+        .map(|s| s.backend.as_str())
+        .unwrap_or("none");
+    let observe_mode = sandbox_backend == "observe";
+
+    let effective_dir = work_dir
+        .clone()
+        .or_else(|| ctx.repos_config.get(&task.repo).map(|rc| rc.path.clone()))
+        .unwrap_or_else(|| std::env::current_dir().unwrap_or_default());
+    let task_slug = format!("TASK-{:04}", task.id.0);
+
+    // Create scratch dir for both os-native and observe modes.
+    let scratch_dir =
+        if cfg!(target_os = "macos") && (sandbox_backend == "os-native" || observe_mode) {
+            crate::sandbox::create_scratch_dir(&ctx.worktrees_dir, &task_slug).ok()
+        } else {
+            None
+        };
+
+    let sandbox_profile = if cfg!(target_os = "macos") && sandbox_backend == "os-native" {
+        if let Some(ref scratch) = scratch_dir {
+            match crate::sandbox::write_seatbelt_profile(&effective_dir, scratch) {
+                Ok(profile) => {
+                    tracing::info!(
+                        task_id = %task.id,
+                        profile = %profile.display(),
+                        scratch = %scratch.display(),
+                        "seatbelt sandbox enabled for agent"
+                    );
+                    Some(profile)
+                }
+                Err(e) => {
+                    tracing::warn!(error = %e, "failed to write seatbelt profile, running unsandboxed");
+                    None
+                }
+            }
+        } else {
+            None
+        }
+    } else if observe_mode {
+        // Write the profile for reference but don't enforce it.
+        if let Some(ref scratch) = scratch_dir {
+            match crate::sandbox::write_seatbelt_profile(&effective_dir, scratch) {
+                Ok(profile) => {
+                    tracing::info!(
+                        task_id = %task.id,
+                        profile = %profile.display(),
+                        "sandbox OBSERVE mode: profile written for reference (not enforced)"
+                    );
+                }
+                Err(e) => {
+                    tracing::debug!(error = %e, "observe mode: could not write reference profile");
+                }
+            }
+        }
+        // Return None so the agent runs without sandbox-exec.
+        None
+    } else {
+        None
+    };
+
     // Start file watcher for real-time change detection
     let agent_id = AgentId::generate(&task.repo, &task.id);
     let repo_config = ctx.repos_config.get(&task.repo);
@@ -517,6 +799,10 @@ async fn run_agent_task(
         None
     };
 
+    // Capture identifiers before `task` is consumed by the pipeline.
+    let task_id = task.id.clone();
+    let task_repo = task.repo.to_string();
+
     let result = match category {
         ClaimCategory::RetryableFailed => {
             crate::parallel::pipeline::retry_task_pipeline(
@@ -524,7 +810,6 @@ async fn run_agent_task(
                 &gate_store,
                 &ctx.repos_config,
                 &ctx.agents_dir,
-                &ctx.registry,
                 &ctx.event_bus,
                 &ctx.budget,
                 ctx.subsample.as_ref(),
@@ -551,7 +836,6 @@ async fn run_agent_task(
                 &gate_store,
                 &ctx.repos_config,
                 &ctx.agents_dir,
-                &ctx.registry,
                 roles_ref,
                 &ctx.event_bus,
                 &ctx.budget,
@@ -568,91 +852,162 @@ async fn run_agent_task(
         w.stop().await;
     }
 
-    result
-}
-
-/// Recover tasks stuck in transient states from a previous engine run.
-///
-/// On engine startup, any tasks in "claimed", "implementing", or "integrating"
-/// state are orphaned (their agent is no longer running). This function resets
-/// them to a re-dispatchable state so they don't stay stuck forever.
-fn recover_stuck_tasks(db: &redb::Database, event_bus: &crate::event_bus::EventBus) -> Result<()> {
-    let task_store = TaskStore::new(db);
-    let all_tasks = task_store.list(None, None)?;
-    let mut recovered = 0;
-
-    for mut task in all_tasks {
-        let reset_to = match &task.status {
-            thrum_core::task::TaskStatus::Claimed { .. }
-            | thrum_core::task::TaskStatus::Implementing { .. } => {
-                // Agent was working on this but the engine stopped.
-                // Reset to Pending so it gets re-dispatched.
-                Some(thrum_core::task::TaskStatus::Pending)
-            }
-            thrum_core::task::TaskStatus::Integrating => {
-                // Post-approval integration was in progress.
-                // Reset to Approved so it re-enters the integration path.
-                Some(thrum_core::task::TaskStatus::Approved)
-            }
-            thrum_core::task::TaskStatus::Reviewing { .. } => {
-                // Review was in progress — implementation is done, just re-run review.
-                // Reset to Pending to run the full pipeline again (safe, gates will catch issues).
-                Some(thrum_core::task::TaskStatus::Pending)
-            }
-            _ => None,
-        };
-
-        if let Some(new_status) = reset_to {
-            let old_label = task.status.label().to_string();
-            let new_label = new_status.label();
+    // Observe mode: audit filesystem writes for would-be violations.
+    if observe_mode {
+        let audit_dir = work_dir.as_ref().unwrap_or(&effective_dir);
+        let scratch = scratch_dir
+            .as_ref()
+            .cloned()
+            .unwrap_or_else(|| ctx.worktrees_dir.join("scratch").join(&task_slug));
+        let violations = crate::sandbox::audit_observe_violations(audit_dir, &scratch);
+        if !violations.is_empty() {
             tracing::warn!(
-                task_id = %task.id,
-                from = old_label,
-                to = new_label,
-                "recovering stuck task from previous engine run"
+                task_id = %task_slug,
+                count = violations.len(),
+                "sandbox observe: {} write(s) would be denied under enforcement",
+                violations.len()
             );
-            task.status = new_status;
-            task.updated_at = chrono::Utc::now();
-            task_store.update(&task)?;
-            recovered += 1;
+        }
+    }
 
-            event_bus.emit(EventKind::TaskStateChange {
-                task_id: task.id.clone(),
-                repo: task.repo.clone(),
-                from: old_label,
-                to: task.status.label().to_string(),
-            });
+    // Clean up the seatbelt profile temp file.
+    if let Some(ref profile) = sandbox_profile
+        && let Err(e) = std::fs::remove_file(profile)
+    {
+        tracing::debug!(error = %e, "seatbelt profile cleanup (non-fatal)");
+    }
+
+    // Update per-repo cooldown based on the pipeline outcome.
+    // Re-read the task from the store to see its final status.
+    if let Ok(Some(final_task)) = task_store.get(&task_id) {
+        if final_task.status.is_claimable_retry() {
+            ctx.repo_cooldowns.record_failure(&task_repo).await;
+            tracing::info!(
+                task_id = %task_id,
+                repo = %task_repo,
+                "recorded repo cooldown failure"
+            );
+        } else if final_task.status.is_terminal()
+            || final_task.status.is_reviewable()
+            || final_task.status.is_awaiting_ci()
+        {
+            ctx.repo_cooldowns.record_success(&task_repo).await;
         }
     }
 
-    if recovered > 0 {
-        tracing::info!(count = recovered, "recovered stuck tasks");
-        event_bus.emit(EventKind::EngineLog {
-            level: thrum_core::event::LogLevel::Info,
-            message: format!("recovered {recovered} stuck tasks from previous run"),
+    result
+}
+
+/// Emit predicted conflict warnings for tasks about to be dispatched.
+///
+/// Uses the file lists from task specs/plans to detect potential overlaps
+/// before tasks start running, giving the engine (and humans) early warning.
+fn emit_predicted_conflicts(
+    task_store: &TaskStore,
+    event_bus: &EventBus,
+    repo_filter: Option<&RepoName>,
+) -> Result<()> {
+    let tasks = task_store.list(Some("pending"), repo_filter)?;
+    let conflicts = thrum_core::dependency::predict_conflicts(&tasks);
+
+    for conflict in &conflicts {
+        event_bus.emit(EventKind::PredictedConflictDetected {
+            task_a: conflict.task_a.clone(),
+            task_b: conflict.task_b.clone(),
+            path: conflict.path.clone(),
+            severity: conflict.severity.to_string(),
         });
     }
 
+    if !conflicts.is_empty() {
+        tracing::warn!(
+            count = conflicts.len(),
+            "predicted file conflicts between pending tasks"
+        );
+    }
+
     Ok(())
 }
 
+/// Run a post-merge compilation check for a repository.
+///
+/// After a batch of tasks merges, verifies the repo still compiles by
+/// running its configured build command. This prevents broken cross-references
+/// (like the convergence module issue) from propagating to the next batch.
+pub async fn run_post_merge_check(
+    repos_config: &ReposConfig,
+    repo_name: &RepoName,
+    after_batch: Option<&str>,
+    event_bus: &EventBus,
+) -> Result<bool> {
+    let repo_config = repos_config
+        .get(repo_name)
+        .context(format!("no config for repo {repo_name}"))?;
+
+    let start = std::time::Instant::now();
+    let build_cmd = &repo_config.build_cmd;
+
+    tracing::info!(
+        repo = %repo_name,
+        command = %build_cmd,
+        "running post-merge compilation check"
+    );
+
+    let output = tokio::process::Command::new("sh")
+        .arg("-c")
+        .arg(build_cmd.as_str())
+        .current_dir(&repo_config.path)
+        .output()
+        .await
+        .context("failed to run post-merge build")?;
+
+    let duration_secs = start.elapsed().as_secs_f64();
+    let passed = output.status.success();
+
+    event_bus.emit(EventKind::PostMergeCheckCompleted {
+        repo: repo_name.clone(),
+        passed,
+        after_batch: after_batch.map(String::from),
+        duration_secs,
+    });
+
+    if !passed {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        tracing::error!(
+            repo = %repo_name,
+            duration = duration_secs,
+            "post-merge compilation check FAILED: {}",
+            stderr.chars().take(500).collect::<String>()
+        );
+    } else {
+        tracing::info!(
+            repo = %repo_name,
+            duration = duration_secs,
+            "post-merge compilation check passed"
+        );
+    }
+
+    Ok(passed)
+}
+
 /// Pipeline functions extracted for sharing between sequential and parallel paths.
 pub mod pipeline {
-    use crate::backend::{AiBackend, AiRequest, AiResponse, BackendRegistry};
-    use crate::claude::load_agent_prompt;
+    use crate::claude_code::AiResponse;
     use crate::event_bus::EventBus;
     use crate::git::GitRepo;
     use anyhow::{Context, Result};
     use chrono::Utc;
     use std::path::{Path, PathBuf};
     use std::sync::Arc;
+    use thrum_core::agent::AgentId;
     use thrum_core::budget::{self, BudgetEntry, BudgetTracker, SessionType};
     use thrum_core::checkpoint::Checkpoint;
     use thrum_core::event::EventKind;
-    use thrum_core::gate::{run_gate, run_integration_gate_configured};
+    use thrum_core::gate::{run_gate, run_integration_gate_configured, run_security_checks};
     use thrum_core::repo::ReposConfig;
     use thrum_core::subsample::SubsampleConfig;
     use thrum_core::task::{CheckpointSummary, GateLevel, MAX_RETRIES, Task, TaskStatus};
+    use thrum_core::traceability::{TraceArtifact, TraceRecord};
     use thrum_db::checkpoint_store::CheckpointStore;
     use thrum_db::gate_store::GateStore;
     use thrum_db::session_store::SessionStore;
@@ -698,6 +1053,27 @@ pub mod pipeline {
             .any(|p| content_lower.contains(p))
     }
 
+    /// Threshold for "rapid failure" detection: if an agent exits with an
+    /// error in less than this many seconds, it likely hit an immediate
+    /// configuration or permission issue rather than doing real work.
+    const RAPID_FAILURE_THRESHOLD_SECS: u64 = 30;
+
+    /// Extra cooldown (seconds) applied when a rapid failure is detected.
+    const RAPID_FAILURE_COOLDOWN_SECS: u64 = 120;
+
+    /// Check if an agent invocation failed rapidly (error exit in under 30s).
+    ///
+    /// Rapid failures indicate systemic issues (API keys, permissions, model
+    /// availability) that won't resolve on immediate retry. A cooldown period
+    /// prevents burning through retry budget on instant failures.
+    fn is_rapid_failure(result: &AiResponse, elapsed: &std::time::Duration) -> bool {
+        let failed = result.exit_code.is_some_and(|c| c != 0) && !result.timed_out;
+        if !failed {
+            return false;
+        }
+        elapsed.as_secs() < RAPID_FAILURE_THRESHOLD_SECS
+    }
+
     /// Emit a task state change event.
     fn emit_state_change(event_bus: &EventBus, task: &Task, from: &str, to: &str) {
         event_bus.emit(EventKind::TaskStateChange {
@@ -786,6 +1162,187 @@ pub mod pipeline {
         );
     }
 
+    /// Insert a trace record into the database, logging any errors without failing.
+    ///
+    /// Returns the requirement_id used for the record (either from the task or a
+    /// generated fallback), which callers can reuse for subsequent trace records.
+    fn emit_trace(db: &redb::Database, task: &Task, artifact: TraceArtifact) -> String {
+        let requirement_id = task
+            .requirement_id
+            .clone()
+            .unwrap_or_else(|| format!("TASK-{:04}", task.id.0));
+
+        let record = TraceRecord {
+            id: 0, // auto-assigned by TraceStore
+            task_id: task.id.0,
+            requirement_id: requirement_id.clone(),
+            artifact,
+            created_at: Utc::now(),
+        };
+
+        let trace_store = thrum_db::trace_store::TraceStore::new(db);
+        match trace_store.insert(record) {
+            Ok(r) => {
+                tracing::debug!(
+                    task_id = %task.id,
+                    trace_id = r.id,
+                    "trace record created"
+                );
+            }
+            Err(e) => {
+                tracing::warn!(
+                    task_id = %task.id,
+                    error = %e,
+                    "failed to create trace record (non-fatal)"
+                );
+            }
+        }
+        requirement_id
+    }
+
+    /// Handle a review timeout by applying the configured recovery strategy.
+    ///
+    /// Returns the review content to use (either a skip note, retried result, or
+    /// the original partial content). Persists timeout events as distinct memory
+    /// entries for observability, separate from other failure types.
+    #[allow(clippy::too_many_arguments)]
+    async fn handle_review_timeout(
+        task: &Task,
+        event_bus: &EventBus,
+        task_store: &TaskStore<'_>,
+        repo_path: &std::path::Path,
+        reviewer_system: &str,
+        diff_stats: &str,
+        budget: &Arc<Mutex<BudgetTracker>>,
+        review_budget_usd: f64,
+        strategy: thrum_core::role::TimeoutRecoveryStrategy,
+        original_result: &AiResponse,
+    ) -> Result<String> {
+        tracing::warn!(
+            task_id = %task.id,
+            recovery = %strategy,
+            "review invocation timed out — applying recovery strategy"
+        );
+
+        match strategy {
+            thrum_core::role::TimeoutRecoveryStrategy::Skip => {
+                let skip_note = "[review-skipped-timeout] Review timed out. \
+                    Implementation passed Gate 1 (quality checks). \
+                    Proceeding without code review."
+                    .to_string();
+
+                event_bus.emit(EventKind::TimeoutRecovered {
+                    task_id: task.id.clone(),
+                    repo: task.repo.clone(),
+                    role: "reviewer".into(),
+                    recovery_action: "review-skipped-timeout".into(),
+                    had_partial_changes: false,
+                });
+
+                store_timeout_memory(task_store, task, "skip");
+                Ok(skip_note)
+            }
+            thrum_core::role::TimeoutRecoveryStrategy::Retry => {
+                // Retry with reduced scope: stats only, no full diff patch
+                tracing::info!(
+                    task_id = %task.id,
+                    "retrying review with reduced scope (stats only)"
+                );
+                let reduced_prompt = format!(
+                    "{reviewer_system}\n\n---\n\n\
+                     Brief review of this change (stats only, full diff omitted due to size):\n\n\
+                     **Stats:** {diff_stats}\n\n\
+                     Please review based on the change summary above."
+                );
+
+                let retry_config = crate::claude_code::AgentConfig {
+                    prompt: reduced_prompt,
+                    cwd: repo_path.to_path_buf(),
+                    max_budget_usd: review_budget_usd,
+                    model: "claude-sonnet-4-6".into(),
+                    resume_session_id: None,
+                    agent: None,
+                    worktree: false,
+                    permission_mode: "default".into(),
+                    timeout_secs: 180,
+                };
+
+                let retry_agent_id = AgentId::generate(&task.repo, &task.id);
+                let retry_result = crate::claude_code::invoke_streaming(
+                    &retry_config,
+                    event_bus,
+                    &retry_agent_id,
+                    &task.id,
+                )
+                .await?;
+                record_invocation_cost(
+                    budget,
+                    task.id.0,
+                    SessionType::Review,
+                    &retry_result,
+                    review_budget_usd,
+                )
+                .await;
+
+                if retry_result.timed_out {
+                    tracing::warn!(
+                        task_id = %task.id,
+                        "reduced-scope review also timed out — skipping review"
+                    );
+                    event_bus.emit(EventKind::TimeoutRecovered {
+                        task_id: task.id.clone(),
+                        repo: task.repo.clone(),
+                        role: "reviewer".into(),
+                        recovery_action: "review-skipped-timeout-after-retry".into(),
+                        had_partial_changes: false,
+                    });
+                    store_timeout_memory(task_store, task, "retry-then-skip");
+                    Ok(
+                        "[review-skipped-timeout] Review timed out twice (full + reduced scope). \
+                         Implementation passed Gate 1. Proceeding without review."
+                            .to_string(),
+                    )
+                } else {
+                    event_bus.emit(EventKind::TimeoutRecovered {
+                        task_id: task.id.clone(),
+                        repo: task.repo.clone(),
+                        role: "reviewer".into(),
+                        recovery_action: "review-retried-reduced-scope".into(),
+                        had_partial_changes: false,
+                    });
+                    Ok(retry_result.content)
+                }
+            }
+            thrum_core::role::TimeoutRecoveryStrategy::Extend
+            | thrum_core::role::TimeoutRecoveryStrategy::Fail => {
+                // Record as a distinct timeout event for observability
+                store_timeout_memory(task_store, task, &strategy.to_string());
+                // Use whatever partial content we got (may be empty)
+                Ok(original_result.content.clone())
+            }
+        }
+    }
+
+    /// Persist a review timeout event as a distinct memory entry for observability.
+    ///
+    /// Using `error_type: "review_timeout"` ensures these events are tracked
+    /// separately from other failures (gate failures, rate limits, etc.).
+    fn store_timeout_memory(task_store: &TaskStore<'_>, task: &Task, strategy: &str) {
+        let mem = thrum_core::memory::MemoryEntry::new(
+            task.id.clone(),
+            task.repo.clone(),
+            thrum_core::memory::MemoryCategory::Error {
+                error_type: "review_timeout".into(),
+            },
+            format!(
+                "Task '{}' review timed out (strategy={})",
+                task.title, strategy
+            ),
+        );
+        let memory_store = thrum_db::memory_store::MemoryStore::new(task_store.db());
+        let _ = memory_store.store(&mem);
+    }
+
     /// Full pipeline: Pending/Claimed → Implement → Gate1 → Review → Gate2 → AwaitingApproval.
     ///
     /// When `roles` is provided, backend selection uses role→backend resolution
@@ -803,7 +1360,6 @@ pub mod pipeline {
         gate_store: &GateStore<'_>,
         repos_config: &ReposConfig,
         agents_dir: &Path,
-        registry: &BackendRegistry,
         roles: Option<&thrum_core::role::RolesConfig>,
         event_bus: &EventBus,
         budget: &Arc<Mutex<BudgetTracker>>,
@@ -823,25 +1379,18 @@ pub mod pipeline {
         };
         let repo_config = &repo_config;
 
-        // Role-aware backend selection: resolve implementer role → backend
-        let (agent, impl_role_name, impl_budget_usd) = if let Some(roles) = roles {
+        // Implementation budget from roles config (or default)
+        let impl_budget_usd = if let Some(roles) = roles {
             let impl_role = roles.implementer();
-            let backend = registry
-                .resolve_role(&impl_role)
-                .context("no backend available for implementer role")?;
-            let budget_usd = impl_role.budget_usd.unwrap_or(6.0);
-            (backend, impl_role.backend.clone(), budget_usd)
+            impl_role.budget_usd.unwrap_or(6.0)
         } else {
-            let backend = registry.agent().context("no agent backend available")?;
-            (backend, "default-agent".to_string(), 6.0)
+            6.0
         };
 
         tracing::info!(
             role = "implementer",
-            backend = agent.name(),
-            model = agent.model(),
-            role_backend = %impl_role_name,
-            "selected backend for implementation"
+            budget_usd = impl_budget_usd,
+            "preparing implementation via claude code"
         );
 
         // --- Budget check: ensure enough remaining before starting ---
@@ -859,6 +1408,33 @@ pub mod pipeline {
             }
         }
 
+        // --- Pre-dispatch audit: validate verification-tagged criteria ---
+        if !task.acceptance_criteria.is_empty() {
+            let audit = thrum_core::verification::audit_criteria(&task.acceptance_criteria);
+            if audit.passed {
+                // Populate tagged_criteria from the audit result
+                task.tagged_criteria = audit.tagged_criteria;
+                tracing::info!(
+                    task_id = %task.id,
+                    criteria_count = task.tagged_criteria.len(),
+                    "pre-dispatch audit passed — all criteria have verification tags"
+                );
+            } else {
+                // Auto-enrich: add suggested tags so the task can proceed
+                tracing::warn!(
+                    task_id = %task.id,
+                    feedback = ?audit.feedback,
+                    "pre-dispatch audit found untagged criteria — auto-enriching"
+                );
+                let enriched = thrum_core::verification::enrich_criteria(&task.acceptance_criteria);
+                task.acceptance_criteria = enriched;
+                let re_audit = thrum_core::verification::audit_criteria(&task.acceptance_criteria);
+                task.tagged_criteria = re_audit.tagged_criteria;
+            }
+            task.updated_at = Utc::now();
+            task_store.update(&task)?;
+        }
+
         // --- Implement ---
         let branch = task.branch_name();
         let prev_status = task.status.label().to_string();
@@ -870,13 +1446,74 @@ pub mod pipeline {
         task_store.update(&task)?;
         emit_state_change(event_bus, &task, &prev_status, "implementing");
 
+        // --- Trace: Requirement records ---
+        // When a spec exists, emit a trace record for each spec requirement.
+        // This feeds requirement IDs into the traceability chain.
+        if let Some(ref spec) = task.spec {
+            for req in &spec.requirements {
+                let trace_store = thrum_db::trace_store::TraceStore::new(task_store.db());
+                let record = TraceRecord {
+                    id: 0,
+                    task_id: task.id.0,
+                    requirement_id: req.id.clone(),
+                    artifact: TraceArtifact::Requirement {
+                        title: req.description.clone(),
+                        description: req.rationale.clone(),
+                    },
+                    created_at: Utc::now(),
+                };
+                if let Err(e) = trace_store.insert(record) {
+                    tracing::warn!(
+                        task_id = %task.id,
+                        req_id = %req.id,
+                        error = %e,
+                        "failed to create spec requirement trace record"
+                    );
+                }
+            }
+
+            // Emit design record from spec
+            emit_trace(
+                task_store.db(),
+                &task,
+                TraceArtifact::Design {
+                    rationale: spec.design.approach.clone(),
+                },
+            );
+        } else {
+            // Fallback: use task-level info as the requirement
+            emit_trace(
+                task_store.db(),
+                &task,
+                TraceArtifact::Requirement {
+                    title: task.title.clone(),
+                    description: task.description.clone(),
+                },
+            );
+
+            emit_trace(
+                task_store.db(),
+                &task,
+                TraceArtifact::Design {
+                    rationale: task.description.clone(),
+                },
+            );
+        }
+
         let git = GitRepo::open(&repo_config.path)?;
-        git.create_branch(&branch)?;
+        // When using a worktree, the branch was already created and checked
+        // out by create_branch_detached + git worktree add in the dispatch
+        // code. Calling create_branch here would fail because git refuses to
+        // force-update a branch that is the current HEAD of a worktree.
+        if work_dir.is_none() {
+            git.create_branch(&branch)?;
+        }
 
         let agent_file = agents_dir.join(format!("implementer_{}.md", task.repo));
-        let system_prompt = load_agent_prompt(&agent_file, repo_config.claude_md.as_deref())
-            .await
-            .unwrap_or_default();
+        let system_prompt =
+            crate::claude_code::load_agent_prompt(&agent_file, repo_config.claude_md.as_deref())
+                .await
+                .unwrap_or_default();
 
         // Inject relevant memories as context.
         // Touch accessed entries so frequently-used memories maintain higher
@@ -898,10 +1535,21 @@ pub mod pipeline {
             }
         };
 
-        let prompt = format!(
-            "{}{memory_context}",
-            build_implementation_prompt(&task, &branch)
-        );
+        let base_prompt = build_implementation_prompt(&task, &branch);
+        let containment_note = if work_dir.is_some() {
+            "\n\nIMPORTANT: You are running inside an isolated git worktree. \
+             Your current working directory IS the repo root — all files are here. \
+             Do NOT navigate to any other directory or absolute path. \
+             Stay in your current working directory for all operations.\
+             \n\nCRITICAL: Before you finish, you MUST commit your work with \
+             `git add -A && git commit -m \"your message\"`. \
+             If you do not commit, ALL your work will be lost. \
+             A pre-commit hook runs cargo fmt and clippy — if the commit is \
+             rejected, fix the issues and commit again. Do NOT use --no-verify."
+        } else {
+            ""
+        };
+        let prompt = format!("{base_prompt}{containment_note}{memory_context}");
 
         // Look up a previous session ID for session continuation on retries.
         // Only resume if the prior invocation was interrupted (timeout or error).
@@ -945,14 +1593,31 @@ pub mod pipeline {
             let _ = session_store.remove(&task.id);
         }
 
-        let mut request = AiRequest::new(&prompt)
-            .with_system(system_prompt)
-            .with_cwd(repo_config.path.clone());
-        if let Some(sid) = resume_sid {
-            request = request.with_resume_session(sid);
-        }
+        // Build the full prompt including system prompt
+        let full_prompt = if system_prompt.is_empty() {
+            prompt.clone()
+        } else {
+            format!("{system_prompt}\n\n---\n\n{prompt}")
+        };
 
-        let result = agent.invoke(&request).await?;
+        let agent_config = crate::claude_code::AgentConfig {
+            prompt: full_prompt,
+            cwd: repo_config.path.clone(),
+            max_budget_usd: impl_budget_usd,
+            model: "claude-opus-4-6".into(),
+            resume_session_id: resume_sid,
+            agent: None,
+            worktree: work_dir.is_some(),
+            permission_mode: "auto".into(),
+            timeout_secs: 1200,
+        };
+
+        let agent_id = AgentId::generate(&task.repo, &task.id);
+        let invoke_start = std::time::Instant::now();
+        let result =
+            crate::claude_code::invoke_streaming(&agent_config, event_bus, &agent_id, &task.id)
+                .await?;
+        let invoke_elapsed = invoke_start.elapsed();
 
         // Store the session ID for potential future retries (timeout/failure recovery).
         // This persists even if the invocation timed out — especially important then,
@@ -979,6 +1644,24 @@ pub mod pipeline {
                 exit_code = ?result.exit_code,
                 "implementation session had issues"
             );
+
+            // Persist implementation timeout as a distinct event for observability
+            if result.timed_out {
+                let mem = thrum_core::memory::MemoryEntry::new(
+                    task.id.clone(),
+                    task.repo.clone(),
+                    thrum_core::memory::MemoryCategory::Error {
+                        error_type: "implementation_timeout".into(),
+                    },
+                    format!(
+                        "Task '{}' implementation timed out (session_id={:?})",
+                        task.title,
+                        result.session_id.as_deref().unwrap_or("none"),
+                    ),
+                );
+                let memory_store = thrum_db::memory_store::MemoryStore::new(task_store.db());
+                let _ = memory_store.store(&mem);
+            }
         }
 
         // Detect API rate limit early. If the agent hit a usage limit, cool down
@@ -1004,18 +1687,40 @@ pub mod pipeline {
                 ),
             });
             tokio::time::sleep(std::time::Duration::from_secs(RATE_LIMIT_COOLDOWN_SECS)).await;
+        } else if is_rapid_failure(&result, &invoke_elapsed) {
+            tracing::warn!(
+                task_id = %task.id,
+                elapsed_secs = invoke_elapsed.as_secs(),
+                exit_code = ?result.exit_code,
+                "rapid failure detected (exited <{}s) — cooling down for {}s",
+                RAPID_FAILURE_THRESHOLD_SECS,
+                RAPID_FAILURE_COOLDOWN_SECS
+            );
+            event_bus.emit(EventKind::EngineLog {
+                level: thrum_core::event::LogLevel::Warn,
+                message: format!(
+                    "TASK-{:04} rapid failure (exit {:?} in {}s). \
+                     Cooling down {}s before continuing.",
+                    task.id.0,
+                    result.exit_code,
+                    invoke_elapsed.as_secs(),
+                    RAPID_FAILURE_COOLDOWN_SECS,
+                ),
+            });
+            tokio::time::sleep(std::time::Duration::from_secs(RAPID_FAILURE_COOLDOWN_SECS)).await;
         }
 
         // Salvage uncommitted partial work before checking for changes.
         // If the agent timed out or errored before committing, there may be
         // useful partial progress in the worktree. Committing it as WIP
         // preserves it on the branch so the next retry can continue.
-        let work_dir = repo_config.path.join(format!(
-            "worktrees/{}",
-            task.branch_name().replace('/', "_")
-        ));
-        if work_dir.exists() {
-            match crate::git::GitRepo::open(&work_dir) {
+        //
+        // Use repo_config.path directly: when a worktree is provided,
+        // with_work_dir() already set repo_config.path to the worktree path.
+        // Constructing a nested worktrees/ subpath would be wrong.
+        let effective_work_dir = repo_config.path.clone();
+        if effective_work_dir.exists() {
+            match crate::git::GitRepo::open(&effective_work_dir) {
                 Ok(g) => {
                     let reason = if result.timed_out {
                         "timed out".to_string()
@@ -1038,6 +1743,17 @@ pub mod pipeline {
                                     task.id.0, reason,
                                 ),
                             });
+                            // Emit a distinct timeout-recovery event when partial
+                            // work was preserved after an implementation timeout.
+                            if result.timed_out {
+                                event_bus.emit(EventKind::TimeoutRecovered {
+                                    task_id: task.id.clone(),
+                                    repo: task.repo.clone(),
+                                    role: "implementer".into(),
+                                    recovery_action: "salvaged-partial-work".into(),
+                                    had_partial_changes: true,
+                                });
+                            }
                         }
                         Ok(false) => {} // clean worktree, nothing to salvage
                         Err(e) => {
@@ -1067,12 +1783,12 @@ pub mod pipeline {
         // IMPORTANT: default to has_changes=true on ANY error. It's better to
         // run gates on unchanged code than to silently discard real agent work.
         // Git status can fail due to index lock contention between concurrent agents.
-        let work_dir = repo_config.path.join(format!(
-            "worktrees/{}",
-            task.branch_name().replace('/', "_")
-        ));
-        let has_changes = if work_dir.exists() {
-            match crate::git::GitRepo::open(&work_dir) {
+        //
+        // Use repo_config.path directly: when a worktree is provided,
+        // with_work_dir() already set repo_config.path to the worktree path.
+        let change_check_dir = repo_config.path.clone();
+        let has_changes = if change_check_dir.exists() {
+            match crate::git::GitRepo::open(&change_check_dir) {
                 Ok(g) => {
                     // Retry once after a short delay if git status fails
                     // (transient index lock from concurrent agents).
@@ -1085,35 +1801,74 @@ pub mod pipeline {
                         std::thread::sleep(std::time::Duration::from_secs(1));
                         g.is_clean()
                     });
-                    let dirty = match clean_result {
-                        Ok(clean) => !clean,
+                    let (dirty, dirty_from_failsafe) = match clean_result {
+                        Ok(clean) => (!clean, false),
                         Err(e) => {
                             tracing::error!(
                                 task_id = %task.id,
                                 error = %e,
                                 "git status failed twice — assuming dirty (fail-safe)"
                             );
-                            true // fail-safe: assume dirty
+                            (true, true) // fail-safe: assume dirty
                         }
                     };
-                    let commits = match g.has_commits_beyond_main() {
-                        Ok(v) => v,
+                    // Retry once after a short delay if commit check fails
+                    // (transient index lock from concurrent agents).
+                    let commits_result = g.has_commits_beyond_main().or_else(|e| {
+                        tracing::warn!(
+                            task_id = %task.id,
+                            error = %e,
+                            "has_commits_beyond_main failed, retrying after 1s (likely index lock)"
+                        );
+                        std::thread::sleep(std::time::Duration::from_secs(1));
+                        g.has_commits_beyond_main()
+                    });
+                    let (commits, commits_from_failsafe) = match commits_result {
+                        Ok(v) => (v, false),
                         Err(e) => {
-                            tracing::warn!(
+                            tracing::error!(
                                 task_id = %task.id,
                                 error = %e,
-                                "has_commits_beyond_main failed — ignoring (dirty check is primary)"
+                                "has_commits_beyond_main failed twice — assuming commits exist (fail-safe)"
                             );
-                            false
+                            (true, true) // fail-safe: assume commits exist rather than discard work
                         }
                     };
-                    dirty || commits
+
+                    let git_says_changes = dirty || commits;
+                    let any_git_error = dirty_from_failsafe || commits_from_failsafe;
+
+                    if git_says_changes {
+                        true
+                    } else if any_git_error {
+                        // Git operations errored — use filesystem fallback
+                        // since we can't trust the git result.
+                        let fs_has_changes =
+                            crate::git::has_modified_source_files(&change_check_dir);
+                        if fs_has_changes {
+                            tracing::warn!(
+                                task_id = %task.id,
+                                "git errored but filesystem has modified source files — preserving work (fail-safe)"
+                            );
+                        }
+                        fs_has_changes
+                    } else {
+                        // Both git checks succeeded and report no changes.
+                        // Trust the result — do NOT use filesystem fallback here
+                        // because worktree checkout sets all mtimes to "now",
+                        // which would always trigger a false positive.
+                        tracing::info!(
+                            task_id = %task.id,
+                            "git confirms no changes (clean worktree, 0 commits beyond main)"
+                        );
+                        false
+                    }
                 }
                 Err(e) => {
                     tracing::error!(
                         task_id = %task.id,
                         error = %e,
-                        work_dir = %work_dir.display(),
+                        work_dir = %change_check_dir.display(),
                         "failed to open worktree git repo — assuming has changes (fail-safe)"
                     );
                     true // fail-safe: assume changes exist
@@ -1129,33 +1884,52 @@ pub mod pipeline {
                     tracing::warn!(
                         task_id = %task.id,
                         error = %e,
-                        "branch_has_commits_beyond_main failed — assuming no changes"
+                        "branch_has_commits_beyond_main failed — assuming changes exist (fail-safe)"
                     );
-                    false
+                    true // fail-safe: don't discard work on git errors
                 }
             }
         };
 
+        // Emit a TimeoutRecovered event when the agent timed out but there
+        // ARE changes to proceed with. The salvage block above only emits
+        // when uncommitted work was saved; this covers the case where the
+        // agent committed before timing out (committed changes, clean worktree).
+        if result.timed_out && has_changes {
+            tracing::info!(
+                task_id = %task.id,
+                "implementation timed out but changes detected — proceeding to gates"
+            );
+            event_bus.emit(EventKind::TimeoutRecovered {
+                task_id: task.id.clone(),
+                repo: task.repo.clone(),
+                role: "implementer".into(),
+                recovery_action: "continued-with-partial-changes".into(),
+                had_partial_changes: true,
+            });
+        }
+
         if !has_changes {
             tracing::error!(
                 task_id = %task.id,
                 exit_code = ?result.exit_code,
+                timed_out = result.timed_out,
                 "implementation produced no changes — failing task"
             );
             emit_state_change(event_bus, &task, "implementing", "gate1_failed");
             let report = thrum_core::task::GateReport {
                 level: thrum_core::task::GateLevel::Quality,
-                checks: vec![thrum_core::task::CheckResult {
-                    name: "implementation_produced_changes".into(),
-                    passed: false,
-                    stdout: String::new(),
-                    stderr: format!(
-                        "Agent returned without making any changes (exit code: {:?}). \
+                checks: vec![thrum_core::task::CheckResult::simple(
+                    "implementation_produced_changes",
+                    false,
+                    "",
+                    format!(
+                        "Agent returned without making any changes (exit code: {:?}, timed_out: {}). \
                          This usually means the API rate limit was hit or the agent errored.",
-                        result.exit_code,
+                        result.exit_code, result.timed_out,
                     ),
-                    exit_code: result.exit_code.unwrap_or(-1),
-                }],
+                    result.exit_code.unwrap_or(-1),
+                )],
                 passed: false,
                 duration_secs: 0.0,
             };
@@ -1165,6 +1939,48 @@ pub mod pipeline {
             return Ok(());
         }
 
+        // --- Trace: Implementation record ---
+        {
+            let commit_sha = GitRepo::open(&repo_config.path)
+                .and_then(|g| g.head_sha())
+                .ok();
+            let files_changed = GitRepo::open(&repo_config.path)
+                .and_then(|g| g.changed_files_on_branch(&branch))
+                .unwrap_or_default();
+            emit_trace(
+                task_store.db(),
+                &task,
+                TraceArtifact::Implementation {
+                    branch: branch.clone(),
+                    commit_sha,
+                    files_changed,
+                },
+            );
+        }
+
+        // --- Spec Compliance Check (if spec exists) ---
+        if let Some(ref spec) = task.spec {
+            let files_changed = GitRepo::open(&repo_config.path)
+                .and_then(|g| g.changed_files_on_branch(&branch))
+                .unwrap_or_default();
+            let compliance = thrum_core::gate::run_spec_compliance_check(
+                spec,
+                &files_changed,
+                &repo_config.path,
+            );
+            tracing::info!(
+                task_id = %task.id,
+                passed = compliance.passed,
+                "spec compliance check: {}",
+                if compliance.passed { "passed" } else { "issues found" }
+            );
+            if !compliance.stdout.is_empty() {
+                for line in compliance.stdout.lines() {
+                    tracing::info!(task_id = %task.id, "  {}", line);
+                }
+            }
+        }
+
         // --- Gate 1: Quality ---
         let checkpoint_store = CheckpointStore::new(task_store.db());
         tracing::info!("running Gate 1: Quality");
@@ -1181,6 +1997,17 @@ pub mod pipeline {
             duration_secs: gate1.duration_secs,
         });
 
+        // --- Trace: Gate 1 Test record ---
+        emit_trace(
+            task_store.db(),
+            &task,
+            TraceArtifact::Test {
+                gate_level: "Quality".to_string(),
+                passed: gate1.passed,
+                report_json: serde_json::to_string(&gate1).unwrap_or_default(),
+            },
+        );
+
         if !gate1.passed {
             emit_state_change(event_bus, &task, "implementing", "gate1_failed");
             task.status = TaskStatus::Gate1Failed {
@@ -1228,6 +2055,32 @@ pub mod pipeline {
             return Ok(());
         }
 
+        // --- Map Gate 1 results to tagged criteria ---
+        if !task.tagged_criteria.is_empty() {
+            task.tagged_criteria =
+                thrum_core::verification::map_gate_results(&task.tagged_criteria, &gate1.checks);
+            let report = thrum_core::verification::VerificationReport::from_criteria(
+                task.id.0,
+                &task.tagged_criteria,
+            );
+            tracing::info!(
+                task_id = %task.id,
+                verified = report.verified_count,
+                failed = report.failed_count,
+                pending = report.pending_count,
+                total = report.total_count,
+                "mapped Gate 1 results to tagged criteria"
+            );
+            if report.has_failures() {
+                tracing::warn!(
+                    task_id = %task.id,
+                    "some tagged criteria failed verification at Gate 1"
+                );
+            }
+            task.updated_at = Utc::now();
+            task_store.update(&task)?;
+        }
+
         // --- Checkpoint: Gate 1 passed ---
         {
             let mut cp = Checkpoint::after_implementation(
@@ -1239,43 +2092,57 @@ pub mod pipeline {
             save_checkpoint(&checkpoint_store, event_bus, &cp);
         }
 
-        // --- Review (role-aware backend selection) ---
-        let (reviewer, review_budget_usd): (&dyn AiBackend, f64) = if let Some(roles) = roles {
+        // --- Review via Claude Code ---
+        let (review_budget_usd, review_timeout_recovery) = if let Some(roles) = roles {
             let rev_role = roles.reviewer();
-            let budget_usd = rev_role.budget_usd.unwrap_or(1.0);
-            let backend = registry
-                .resolve_role(&rev_role)
-                .or_else(|| registry.chat())
-                .or_else(|| registry.agent())
-                .context("no backend available for reviewer role")?;
-            (backend, budget_usd)
+            (
+                rev_role.budget_usd.unwrap_or(1.0),
+                rev_role.timeout_recovery,
+            )
         } else {
-            let backend = registry
-                .chat()
-                .or_else(|| registry.agent())
-                .context("no backend available for review")?;
-            (backend, 1.0)
+            (1.0, thrum_core::role::TimeoutRecoveryStrategy::Skip)
         };
 
         tracing::info!(
             role = "reviewer",
-            backend = reviewer.name(),
-            model = reviewer.model(),
-            "selected backend for review"
+            budget_usd = review_budget_usd,
+            timeout_recovery = %review_timeout_recovery,
+            "preparing review via claude code"
         );
 
         let reviewer_prompt_file = agents_dir.join("reviewer.md");
-        let reviewer_system = load_agent_prompt(&reviewer_prompt_file, None)
+        let reviewer_system = crate::claude_code::load_agent_prompt(&reviewer_prompt_file, None)
             .await
             .unwrap_or_default();
 
-        let diff = git.diff_summary().unwrap_or_default();
-        let review_request = AiRequest::new(format!(
-            "Review this change for correctness, proof obligations, and style:\n\n{diff}"
-        ))
-        .with_system(reviewer_system);
+        let diff_patch = git.diff_patch_for_branch(&branch).unwrap_or_default();
+        let diff_stats = git.diff_summary_for_branch(&branch).unwrap_or_default();
+        let review_prompt = format!(
+            "{reviewer_system}\n\n---\n\n\
+             Review this change for correctness, proof obligations, and style:\n\n\
+             **Stats:** {diff_stats}\n\n```diff\n{diff_patch}\n```"
+        );
 
-        let review_result = reviewer.invoke(&review_request).await?;
+        let review_config = crate::claude_code::AgentConfig {
+            prompt: review_prompt,
+            cwd: repo_config.path.clone(),
+            max_budget_usd: review_budget_usd,
+            model: "claude-sonnet-4-6".into(),
+            resume_session_id: None,
+            agent: None,
+            worktree: false,
+            permission_mode: "default".into(),
+            timeout_secs: 300,
+        };
+
+        let review_agent_id = AgentId::generate(&task.repo, &task.id);
+        let review_result = crate::claude_code::invoke_streaming(
+            &review_config,
+            event_bus,
+            &review_agent_id,
+            &task.id,
+        )
+        .await?;
 
         // Record review cost
         record_invocation_cost(
@@ -1287,9 +2154,39 @@ pub mod pipeline {
         )
         .await;
 
+        // --- Handle review timeout with configurable recovery ---
+        let review_content = if review_result.timed_out {
+            handle_review_timeout(
+                &task,
+                event_bus,
+                task_store,
+                &repo_config.path,
+                &reviewer_system,
+                &diff_stats,
+                budget,
+                review_budget_usd,
+                review_timeout_recovery,
+                &review_result,
+            )
+            .await?
+        } else {
+            review_result.content
+        };
+
+        // --- Trace: Review record ---
+        emit_trace(
+            task_store.db(),
+            &task,
+            TraceArtifact::Review {
+                reviewer: "claude-code".to_string(),
+                approved: true, // passed Gate 1 review
+                comments: review_content.clone(),
+            },
+        );
+
         emit_state_change(event_bus, &task, "implementing", "reviewing");
         task.status = TaskStatus::Reviewing {
-            reviewer_output: review_result.content.clone(),
+            reviewer_output: review_content.clone(),
         };
         task.updated_at = Utc::now();
         task_store.update(&task)?;
@@ -1299,7 +2196,7 @@ pub mod pipeline {
             let cp_store = CheckpointStore::new(task_store.db());
             match cp_store.get(&task.id) {
                 Ok(Some(mut cp)) => {
-                    cp.advance_to_review(review_result.content.clone());
+                    cp.advance_to_review(review_content.clone());
                     save_checkpoint(&cp_store, event_bus, &cp);
                 }
                 _ => {
@@ -1310,7 +2207,7 @@ pub mod pipeline {
                         branch.clone(),
                     );
                     cp.advance_to_gate1(gate1.clone());
-                    cp.advance_to_review(review_result.content.clone());
+                    cp.advance_to_review(review_content.clone());
                     save_checkpoint(&cp_store, event_bus, &cp);
                 }
             }
@@ -1322,7 +2219,36 @@ pub mod pipeline {
             task_id: task.id.clone(),
             level: GateLevel::Proof,
         });
-        let gate2 = run_gate(&GateLevel::Proof, repo_config, subsample, Some(task.id.0))?;
+        let mut gate2 = run_gate(&GateLevel::Proof, repo_config, subsample, Some(task.id.0))?;
+
+        // If the task has a spec with proof obligations, run spec-driven proof checks
+        // and merge them into the gate report.
+        if let Some(ref spec) = task.spec
+            && !spec.proof_obligations.is_empty()
+        {
+            tracing::info!(
+                task_id = %task.id,
+                obligations = spec.proof_obligations.len(),
+                "running spec-driven proof obligation checks"
+            );
+            match thrum_core::gate::run_spec_proof_checks(spec, repo_config) {
+                Ok(spec_checks) => {
+                    let spec_failed = spec_checks.iter().any(|c| !c.passed);
+                    gate2.checks.extend(spec_checks);
+                    if spec_failed {
+                        gate2.passed = false;
+                    }
+                }
+                Err(e) => {
+                    tracing::warn!(
+                        task_id = %task.id,
+                        error = %e,
+                        "failed to run spec proof checks"
+                    );
+                }
+            }
+        }
+
         gate_store.store(&task.id, &gate2)?;
         event_bus.emit(EventKind::GateFinished {
             task_id: task.id.clone(),
@@ -1331,6 +2257,37 @@ pub mod pipeline {
             duration_secs: gate2.duration_secs,
         });
 
+        // --- Trace: Gate 2 Test record ---
+        emit_trace(
+            task_store.db(),
+            &task,
+            TraceArtifact::Test {
+                gate_level: "Proof".to_string(),
+                passed: gate2.passed,
+                report_json: serde_json::to_string(&gate2).unwrap_or_default(),
+            },
+        );
+
+        // --- Trace: Proof records for Z3/Rocq checks ---
+        for check in &gate2.checks {
+            let prover = if check.name.contains("z3") {
+                "z3"
+            } else if check.name.contains("rocq") || check.name.contains("coq") {
+                "rocq"
+            } else {
+                continue;
+            };
+            emit_trace(
+                task_store.db(),
+                &task,
+                TraceArtifact::Proof {
+                    prover: prover.to_string(),
+                    passed: check.passed,
+                    report_json: serde_json::to_string(check).unwrap_or_default(),
+                },
+            );
+        }
+
         if !gate2.passed {
             emit_state_change(event_bus, &task, "reviewing", "gate2_failed");
             task.status = TaskStatus::Gate2Failed {
@@ -1374,6 +2331,32 @@ pub mod pipeline {
             return Ok(());
         }
 
+        // --- Map Gate 2 results to tagged criteria ---
+        if !task.tagged_criteria.is_empty() {
+            task.tagged_criteria =
+                thrum_core::verification::map_gate_results(&task.tagged_criteria, &gate2.checks);
+            let report = thrum_core::verification::VerificationReport::from_criteria(
+                task.id.0,
+                &task.tagged_criteria,
+            );
+            tracing::info!(
+                task_id = %task.id,
+                verified = report.verified_count,
+                failed = report.failed_count,
+                pending = report.pending_count,
+                total = report.total_count,
+                "mapped Gate 2 results to tagged criteria"
+            );
+            if report.all_verified() {
+                tracing::info!(
+                    task_id = %task.id,
+                    "all tagged criteria verified after Gate 2"
+                );
+            }
+            task.updated_at = Utc::now();
+            task_store.update(&task)?;
+        }
+
         // --- Checkpoint: Gate 2 passed ---
         {
             let cp_store = CheckpointStore::new(task_store.db());
@@ -1389,19 +2372,54 @@ pub mod pipeline {
                         branch.clone(),
                     );
                     cp.advance_to_gate1(gate1.clone());
-                    cp.advance_to_review(review_result.content.clone());
+                    cp.advance_to_review(review_content.clone());
                     cp.advance_to_gate2(gate2.clone());
                     save_checkpoint(&cp_store, event_bus, &cp);
                 }
             }
         }
 
+        // --- Trust Boundary Assessment ---
+        let trust_assessment = if let Some(ref trust_config) = repo_config.trust {
+            let changed_files: Vec<String> = diff_stats
+                .lines()
+                .filter(|l| !l.trim().is_empty())
+                .map(|l| l.trim().to_string())
+                .collect();
+            let assessment = trust_config.assess(&changed_files);
+
+            if assessment.requires_human_review {
+                tracing::warn!(
+                    task_id = %task.id,
+                    overall_risk = %assessment.overall_risk,
+                    "HIGH-RISK files changed — auto-approval blocked"
+                );
+            }
+            if assessment.triggers_security_checks {
+                tracing::info!(task_id = %task.id, "running security checks for sensitive changes");
+                if let Ok(sec_checks) = run_security_checks(repo_config) {
+                    for check in &sec_checks {
+                        if !check.passed {
+                            tracing::warn!(
+                                check = %check.name,
+                                "security check failed (advisory)"
+                            );
+                        }
+                    }
+                }
+            }
+            Some(assessment)
+        } else {
+            None
+        };
+
         // --- Await Human Approval ---
         let summary = CheckpointSummary {
-            diff_summary: diff,
-            reviewer_output: review_result.content,
+            diff_summary: diff_stats,
+            reviewer_output: review_content,
             gate1_report: gate1,
             gate2_report: Some(gate2),
+            trust_assessment,
         };
         emit_state_change(event_bus, &task, "reviewing", "awaiting_approval");
         task.status = TaskStatus::AwaitingApproval { summary };
@@ -1487,13 +2505,13 @@ pub mod pipeline {
             tracing::info!("no integration steps configured, Gate 3 passes vacuously");
             thrum_core::task::GateReport {
                 level: GateLevel::Integration,
-                checks: vec![thrum_core::task::CheckResult {
-                    name: "no_integration_steps".into(),
-                    passed: true,
-                    stdout: "No integration steps configured for this pipeline".into(),
-                    stderr: String::new(),
-                    exit_code: 0,
-                }],
+                checks: vec![thrum_core::task::CheckResult::simple(
+                    "no_integration_steps",
+                    true,
+                    "No integration steps configured for this pipeline",
+                    "",
+                    0,
+                )],
                 passed: true,
                 duration_secs: 0.0,
             }
@@ -1513,6 +2531,17 @@ pub mod pipeline {
             duration_secs: gate3.duration_secs,
         });
 
+        // --- Trace: Gate 3 Test record ---
+        emit_trace(
+            task_store.db(),
+            &task,
+            TraceArtifact::Test {
+                gate_level: "Integration".to_string(),
+                passed: gate3.passed,
+                report_json: serde_json::to_string(&gate3).unwrap_or_default(),
+            },
+        );
+
         if !gate3.passed {
             emit_state_change(event_bus, &task, "integrating", "gate3_failed");
             task.status = TaskStatus::Gate3Failed { report: gate3 };
@@ -1522,31 +2551,76 @@ pub mod pipeline {
             return Ok(());
         }
 
-        // --- Merge ---
+        // --- CI or local merge ---
         let branch = task.branch_name();
-        tracing::info!(branch = %branch, "merging branch to main");
-        let git = GitRepo::open(&repo_config.path)?;
-        let commit_sha = git
-            .merge_branch_to_main(&branch)
-            .context("failed to merge branch")?;
 
-        emit_state_change(event_bus, &task, "integrating", "merged");
-        task.status = TaskStatus::Merged {
-            commit_sha: commit_sha.clone(),
-        };
-        task.updated_at = Utc::now();
-        task_store.update(&task)?;
+        // Check if CI integration is configured for this repo
+        let ci_enabled = base_repo_config.ci.as_ref().is_some_and(|ci| ci.enabled);
 
-        // Clean up any stale checkpoint and session for this task
-        let checkpoint_store = CheckpointStore::new(task_store.db());
-        remove_checkpoint(&checkpoint_store, &task);
-        let _ = SessionStore::new(task_store.db()).remove(&task.id);
+        if ci_enabled {
+            // Push branch and create PR, then transition to AwaitingCI
+            tracing::info!(
+                task_id = %task.id,
+                branch = %branch,
+                "CI integration enabled — pushing branch and creating PR"
+            );
 
-        tracing::info!(
-            task_id = %task.id,
-            commit = %commit_sha,
-            "task merged successfully"
-        );
+            crate::ci::push_branch(&repo_config.path, &branch)
+                .context("failed to push branch to remote")?;
+
+            let pr_title = format!("[thrum] {}", task.title);
+            let pr_body = format!(
+                "## {}\n\n{}\n\n---\n*Created by thrum ({}).*",
+                task.title, task.description, task.id
+            );
+
+            let (pr_number, pr_url) =
+                crate::ci::create_pr(&repo_config.path, &branch, &pr_title, &pr_body)
+                    .context("failed to create PR")?;
+
+            emit_state_change(event_bus, &task, "integrating", "awaiting-ci");
+            task.status = TaskStatus::AwaitingCI {
+                pr_number,
+                pr_url: pr_url.clone(),
+                branch: branch.clone(),
+                started_at: Utc::now(),
+                ci_attempts: 0,
+            };
+            task.updated_at = Utc::now();
+            task_store.update(&task)?;
+
+            tracing::info!(
+                task_id = %task.id,
+                pr_number,
+                pr_url = %pr_url,
+                "PR created, transitioning to AwaitingCI"
+            );
+        } else {
+            // Local merge (original behavior)
+            tracing::info!(branch = %branch, "merging branch to main");
+            let git = GitRepo::open(&repo_config.path)?;
+            let commit_sha = git
+                .merge_branch_to_main(&branch)
+                .context("failed to merge branch")?;
+
+            emit_state_change(event_bus, &task, "integrating", "merged");
+            task.status = TaskStatus::Merged {
+                commit_sha: commit_sha.clone(),
+            };
+            task.updated_at = Utc::now();
+            task_store.update(&task)?;
+
+            // Clean up any stale checkpoint and session for this task
+            let checkpoint_store = CheckpointStore::new(task_store.db());
+            remove_checkpoint(&checkpoint_store, &task);
+            let _ = SessionStore::new(task_store.db()).remove(&task.id);
+
+            tracing::info!(
+                task_id = %task.id,
+                commit = %commit_sha,
+                "task merged successfully"
+            );
+        }
 
         Ok(())
     }
@@ -1568,7 +2642,6 @@ pub mod pipeline {
         gate_store: &GateStore<'_>,
         repos_config: &ReposConfig,
         agents_dir: &Path,
-        registry: &BackendRegistry,
         event_bus: &EventBus,
         budget: &Arc<Mutex<BudgetTracker>>,
         subsample: Option<&SubsampleConfig>,
@@ -1658,33 +2731,39 @@ pub mod pipeline {
         // Exponential backoff: wait before retrying to avoid rapid churn.
         // This prevents burning through all retries in seconds when hitting
         // rate limits or transient API errors.
+        //
+        // NOTE: retry_count has NOT been incremented yet at this point, so
+        // we use (retry_count + 1) as the index. Index 0 in the schedule
+        // represents the initial run (no backoff), while every retry gets
+        // at least the minimum delay.
         {
-            let backoff_secs = RETRY_BACKOFF_SECS[task.retry_count.min(3) as usize];
-            if backoff_secs > 0 {
-                tracing::info!(
-                    task_id = %task.id,
-                    retry = task.retry_count,
-                    backoff_secs,
-                    "applying exponential backoff before retry"
-                );
-                event_bus.emit(EventKind::EngineLog {
-                    level: thrum_core::event::LogLevel::Info,
-                    message: format!(
-                        "TASK-{:04} retry {}/{}: backing off {}s before next attempt",
-                        task.id.0, task.retry_count, MAX_RETRIES, backoff_secs
-                    ),
-                });
-                tokio::time::sleep(std::time::Duration::from_secs(backoff_secs)).await;
-            }
+            let backoff_idx = (task.retry_count + 1).min(3) as usize;
+            let backoff_secs = RETRY_BACKOFF_SECS[backoff_idx];
+            tracing::info!(
+                task_id = %task.id,
+                retry = task.retry_count + 1,
+                backoff_secs,
+                "applying exponential backoff before retry"
+            );
+            event_bus.emit(EventKind::EngineLog {
+                level: thrum_core::event::LogLevel::Info,
+                message: format!(
+                    "TASK-{:04} retry {}/{}: backing off {}s before next attempt",
+                    task.id.0,
+                    task.retry_count + 1,
+                    MAX_RETRIES,
+                    backoff_secs
+                ),
+            });
+            tokio::time::sleep(std::time::Duration::from_secs(backoff_secs)).await;
         }
 
-        // Query failure-specific memories for context-aware retries.
-        // These are error-category memories from the same repo, surfacing
-        // patterns like "cargo fmt failed" or "proof obligation missing"
-        // that help the agent avoid repeating past mistakes.
+        // Query failure-specific memories for THIS task only.
+        // Previously this queried all errors for the entire repo, which
+        // injected error context from unrelated tasks and confused agents.
         let failure_memories = {
             let memory_store = thrum_db::memory_store::MemoryStore::new(task_store.db());
-            match memory_store.query_errors_for_repo(&task.repo, 5) {
+            match memory_store.query_errors_for_task(&task.id, 5) {
                 Ok(memories) if !memories.is_empty() => {
                     // Touch accessed memories to maintain their relevance
                     let ids: Vec<_> = memories.iter().map(|m| m.id.clone()).collect();
@@ -1700,14 +2779,27 @@ pub mod pipeline {
             }
         };
 
+        // Preserve the current gate report in history before resetting status.
+        if let Some(report) = task.status.gate_report() {
+            task.gate_history.push(report.clone());
+        }
+
         task.retry_count += 1;
         task.status = TaskStatus::Pending;
         task.updated_at = Utc::now();
         task_store.update(&task)?;
 
-        let original_desc = task.description.clone();
+        // Only keep the ORIGINAL description (before any retry blocks were appended).
+        // Accumulating retry context across 10+ retries makes prompts enormous and
+        // causes agents to time out before writing any code.
+        let base_desc = task
+            .description
+            .split("\n\n---\n**RETRY ")
+            .next()
+            .unwrap_or(&task.description)
+            .to_string();
         task.description = format!(
-            "{original_desc}\n\n---\n**RETRY {}/{} [strategy: {}]** — Previous attempt failed:\n\
+            "{base_desc}\n\n---\n**RETRY {}/{} [strategy: {}]** — Previous attempt failed:\n\
              {feedback}{failure_memories}{convergence_prompt}",
             task.retry_count,
             MAX_RETRIES,
@@ -1720,7 +2812,6 @@ pub mod pipeline {
             gate_store,
             repos_config,
             agents_dir,
-            registry,
             None,
             event_bus,
             budget,
@@ -1745,7 +2836,6 @@ pub mod pipeline {
         gate_store: &GateStore<'_>,
         repos_config: &ReposConfig,
         agents_dir: &Path,
-        registry: &BackendRegistry,
         roles: Option<&thrum_core::role::RolesConfig>,
         event_bus: &EventBus,
         budget: &Arc<Mutex<BudgetTracker>>,
@@ -1788,9 +2878,14 @@ pub mod pipeline {
         task_store.update(&task)?;
         emit_state_change(event_bus, &task, &prev_status, "implementing (resumed)");
 
-        // Verify the branch still exists
+        // Verify the branch still exists.
+        // When using a worktree, the branch is already checked out there;
+        // calling create_branch would fail because git refuses to force-update
+        // a branch that is the current HEAD of a worktree.
         let git = GitRepo::open(&repo_config.path)?;
-        if let Err(e) = git.create_branch(&branch) {
+        if work_dir.is_none()
+            && let Err(e) = git.create_branch(&branch)
+        {
             tracing::debug!(
                 error = %e,
                 "branch already exists (expected for resume)"
@@ -1848,36 +2943,51 @@ pub mod pipeline {
             );
             checkpoint.reviewer_output.clone().unwrap_or_default()
         } else {
-            // Run review
-            let (reviewer, review_budget_usd): (&dyn AiBackend, f64) = if let Some(roles) = roles {
+            // Run review via Claude Code
+            let (review_budget_usd, review_timeout_recovery) = if let Some(roles) = roles {
                 let rev_role = roles.reviewer();
-                let budget_usd = rev_role.budget_usd.unwrap_or(1.0);
-                let backend = registry
-                    .resolve_role(&rev_role)
-                    .or_else(|| registry.chat())
-                    .or_else(|| registry.agent())
-                    .context("no backend available for reviewer role")?;
-                (backend, budget_usd)
+                (
+                    rev_role.budget_usd.unwrap_or(1.0),
+                    rev_role.timeout_recovery,
+                )
             } else {
-                let backend = registry
-                    .chat()
-                    .or_else(|| registry.agent())
-                    .context("no backend available for review")?;
-                (backend, 1.0)
+                (1.0, thrum_core::role::TimeoutRecoveryStrategy::Skip)
             };
 
             let reviewer_prompt_file = agents_dir.join("reviewer.md");
-            let reviewer_system = load_agent_prompt(&reviewer_prompt_file, None)
-                .await
-                .unwrap_or_default();
+            let reviewer_system =
+                crate::claude_code::load_agent_prompt(&reviewer_prompt_file, None)
+                    .await
+                    .unwrap_or_default();
+
+            let diff_patch = git.diff_patch_for_branch(&branch).unwrap_or_default();
+            let diff_stats = git.diff_summary_for_branch(&branch).unwrap_or_default();
+            let review_prompt = format!(
+                "{reviewer_system}\n\n---\n\n\
+                 Review this change for correctness, proof obligations, and style:\n\n\
+                 **Stats:** {diff_stats}\n\n```diff\n{diff_patch}\n```"
+            );
 
-            let diff = git.diff_summary().unwrap_or_default();
-            let review_request = AiRequest::new(format!(
-                "Review this change for correctness, proof obligations, and style:\n\n{diff}"
-            ))
-            .with_system(reviewer_system);
+            let review_config = crate::claude_code::AgentConfig {
+                prompt: review_prompt,
+                cwd: repo_config.path.clone(),
+                max_budget_usd: review_budget_usd,
+                model: "claude-sonnet-4-6".into(),
+                resume_session_id: None,
+                agent: None,
+                worktree: false,
+                permission_mode: "default".into(),
+                timeout_secs: 300,
+            };
 
-            let review_result = reviewer.invoke(&review_request).await?;
+            let review_agent_id = AgentId::generate(&task.repo, &task.id);
+            let review_result = crate::claude_code::invoke_streaming(
+                &review_config,
+                event_bus,
+                &review_agent_id,
+                &task.id,
+            )
+            .await?;
             record_invocation_cost(
                 budget,
                 task.id.0,
@@ -1887,9 +2997,28 @@ pub mod pipeline {
             )
             .await;
 
+            // Handle review timeout with recovery strategy
+            let review_content = if review_result.timed_out {
+                handle_review_timeout(
+                    &task,
+                    event_bus,
+                    task_store,
+                    &repo_config.path,
+                    &reviewer_system,
+                    &diff_stats,
+                    budget,
+                    review_budget_usd,
+                    review_timeout_recovery,
+                    &review_result,
+                )
+                .await?
+            } else {
+                review_result.content
+            };
+
             emit_state_change(event_bus, &task, "implementing", "reviewing");
             task.status = TaskStatus::Reviewing {
-                reviewer_output: review_result.content.clone(),
+                reviewer_output: review_content.clone(),
             };
             task.updated_at = Utc::now();
             task_store.update(&task)?;
@@ -1898,12 +3027,12 @@ pub mod pipeline {
             {
                 let cp_store = CheckpointStore::new(task_store.db());
                 if let Ok(Some(mut cp)) = cp_store.get(&task.id) {
-                    cp.advance_to_review(review_result.content.clone());
+                    cp.advance_to_review(review_content.clone());
                     save_checkpoint(&cp_store, event_bus, &cp);
                 }
             }
 
-            review_result.content
+            review_content
         };
 
         let gate2_report = if checkpoint.gate2_passed() {
@@ -1950,12 +3079,44 @@ pub mod pipeline {
         };
 
         // --- AwaitingApproval ---
-        let diff = git.diff_summary().unwrap_or_default();
+        let diff_stats = git.diff_summary_for_branch(&branch).unwrap_or_default();
+
+        // Trust boundary assessment for resumed pipeline
+        let trust_assessment = if let Some(ref trust_config) = repo_config.trust {
+            let changed_files: Vec<String> = diff_stats
+                .lines()
+                .filter(|l| !l.trim().is_empty())
+                .map(|l| l.trim().to_string())
+                .collect();
+            let assessment = trust_config.assess(&changed_files);
+            if assessment.requires_human_review {
+                tracing::warn!(
+                    task_id = %task.id,
+                    overall_risk = %assessment.overall_risk,
+                    "HIGH-RISK files changed — auto-approval blocked"
+                );
+            }
+            if assessment.triggers_security_checks {
+                tracing::info!(task_id = %task.id, "running security checks for sensitive changes");
+                if let Ok(sec_checks) = run_security_checks(repo_config) {
+                    for check in &sec_checks {
+                        if !check.passed {
+                            tracing::warn!(check = %check.name, "security check failed (advisory)");
+                        }
+                    }
+                }
+            }
+            Some(assessment)
+        } else {
+            None
+        };
+
         let summary = CheckpointSummary {
-            diff_summary: diff,
+            diff_summary: diff_stats,
             reviewer_output,
             gate1_report,
             gate2_report,
+            trust_assessment,
         };
         emit_state_change(event_bus, &task, "reviewing", "awaiting_approval");
         task.status = TaskStatus::AwaitingApproval { summary };
@@ -2222,10 +3383,570 @@ pub mod pipeline {
 
         #[test]
         fn backoff_schedule() {
-            assert_eq!(RETRY_BACKOFF_SECS[0], 0); // initial (no backoff)
+            assert_eq!(RETRY_BACKOFF_SECS[0], 0); // initial run (no backoff)
             assert_eq!(RETRY_BACKOFF_SECS[1], 30); // first retry
             assert_eq!(RETRY_BACKOFF_SECS[2], 120); // second retry
-            assert_eq!(RETRY_BACKOFF_SECS[3], 300); // third retry
+            assert_eq!(RETRY_BACKOFF_SECS[3], 300); // third+ retry
+        }
+
+        /// Verify the retry-count-to-backoff mapping used in retry_task_pipeline.
+        ///
+        /// retry_count has NOT been incremented when the backoff is computed,
+        /// so (retry_count + 1) is the index. This ensures every retry gets
+        /// at least the minimum delay (30s).
+        #[test]
+        fn backoff_index_maps_retry_count_correctly() {
+            fn backoff_for_retry(retry_count: u32) -> u64 {
+                let idx = (retry_count + 1).min(3) as usize;
+                RETRY_BACKOFF_SECS[idx]
+            }
+            assert_eq!(backoff_for_retry(0), 30);
+            assert_eq!(backoff_for_retry(1), 120);
+            assert_eq!(backoff_for_retry(2), 300);
+            assert_eq!(backoff_for_retry(9), 300);
+        }
+
+        // --- Rapid failure detection tests ---
+
+        #[test]
+        fn rapid_failure_fast_exit_with_error() {
+            let r = make_response(Some(1), "Error", false);
+            let elapsed = std::time::Duration::from_secs(5);
+            assert!(is_rapid_failure(&r, &elapsed));
+        }
+
+        #[test]
+        fn rapid_failure_not_triggered_on_slow_exit() {
+            let r = make_response(Some(1), "Error", false);
+            let elapsed = std::time::Duration::from_secs(60);
+            assert!(!is_rapid_failure(&r, &elapsed));
+        }
+
+        #[test]
+        fn rapid_failure_not_triggered_on_success() {
+            let r = make_response(Some(0), "Done", false);
+            let elapsed = std::time::Duration::from_secs(5);
+            assert!(!is_rapid_failure(&r, &elapsed));
+        }
+
+        #[test]
+        fn rapid_failure_not_triggered_on_timeout() {
+            let r = make_response(Some(-1), "", true);
+            let elapsed = std::time::Duration::from_secs(5);
+            assert!(!is_rapid_failure(&r, &elapsed));
+        }
+
+        #[test]
+        fn rapid_failure_boundary_at_threshold() {
+            let r = make_response(Some(1), "Error", false);
+            let at = std::time::Duration::from_secs(RAPID_FAILURE_THRESHOLD_SECS);
+            assert!(!is_rapid_failure(&r, &at));
+            let below = std::time::Duration::from_secs(RAPID_FAILURE_THRESHOLD_SECS - 1);
+            assert!(is_rapid_failure(&r, &below));
+        }
+    }
+
+    // --- RepoCooldownTracker tests (async) ---
+
+    #[cfg(test)]
+    mod cooldown_tests {
+        use super::super::RepoCooldownTracker;
+
+        #[tokio::test]
+        async fn repo_cooldown_no_cooldown_initially() {
+            let tracker = RepoCooldownTracker::new();
+            assert!(tracker.check_cooldown("test-repo").await.is_none());
+        }
+
+        #[tokio::test]
+        async fn repo_cooldown_active_after_failure() {
+            let tracker = RepoCooldownTracker::new();
+            tracker.record_failure("test-repo").await;
+            let remaining = tracker.check_cooldown("test-repo").await;
+            assert!(remaining.is_some());
+            assert!(remaining.unwrap() > 0);
+        }
+
+        #[tokio::test]
+        async fn repo_cooldown_cleared_on_success() {
+            let tracker = RepoCooldownTracker::new();
+            tracker.record_failure("test-repo").await;
+            assert!(tracker.check_cooldown("test-repo").await.is_some());
+            tracker.record_success("test-repo").await;
+            assert!(tracker.check_cooldown("test-repo").await.is_none());
+        }
+
+        #[tokio::test]
+        async fn repo_cooldown_independent_per_repo() {
+            let tracker = RepoCooldownTracker::new();
+            tracker.record_failure("repo-a").await;
+            assert!(tracker.check_cooldown("repo-a").await.is_some());
+            assert!(tracker.check_cooldown("repo-b").await.is_none());
+        }
+
+        #[tokio::test]
+        async fn repo_cooldown_consecutive_failures_scale() {
+            let tracker = RepoCooldownTracker::new();
+
+            tracker.record_failure("test-repo").await;
+            let r1 = tracker.check_cooldown("test-repo").await.unwrap();
+
+            tracker.record_failure("test-repo").await;
+            let r2 = tracker.check_cooldown("test-repo").await.unwrap();
+
+            tracker.record_failure("test-repo").await;
+            let r3 = tracker.check_cooldown("test-repo").await.unwrap();
+
+            // 4th failure: clamped to 3 * base (not 4 * base)
+            tracker.record_failure("test-repo").await;
+            let r4 = tracker.check_cooldown("test-repo").await.unwrap();
+
+            assert!(r1 <= r2, "r1={r1} should be <= r2={r2}");
+            assert!(r2 <= r3, "r2={r2} should be <= r3={r3}");
+            assert!(
+                (r3 as i64 - r4 as i64).unsigned_abs() <= 1,
+                "r3={r3} should be approx r4={r4} (clamped at 3)"
+            );
+        }
+    }
+
+    #[cfg(test)]
+    mod spec_tests {
+        use super::build_implementation_prompt;
+
+        /// Planner produces a Spec stored in task metadata.
+        #[test]
+        fn planner_task_with_spec_stores_spec_on_task() {
+            use thrum_core::spec::{DesignSpec, Priority, Spec, SpecRequirement};
+            use thrum_core::task::{RepoName, Task};
+
+            let spec = Spec {
+                title: "Add feature X".into(),
+                context: "Business requirement".into(),
+                requirements: vec![SpecRequirement {
+                    id: "REQ-001".into(),
+                    description: "Feature X must work".into(),
+                    rationale: "Customer request".into(),
+                    priority: Priority::P1,
+                    safety_relevance: None,
+                }],
+                design: DesignSpec {
+                    approach: "Modify module Y".into(),
+                    affected_files: vec!["src/y.rs".into()],
+                    ..Default::default()
+                },
+                acceptance_criteria: vec!["Feature X works (TEST)".into()],
+                proof_obligations: Vec::new(),
+                test_plan: Vec::new(),
+            };
+
+            let mut task = Task::new(RepoName::new("test"), "Add X".into(), "desc".into());
+            task.spec = Some(spec.clone());
+            task.acceptance_criteria = spec.tagged_acceptance_criteria();
+
+            assert!(task.spec.is_some());
+            assert_eq!(task.spec.as_ref().unwrap().title, "Add feature X");
+            assert!(!task.acceptance_criteria.is_empty());
+        }
+
+        /// Spec requirements feed into traceability chain.
+        #[test]
+        fn spec_requirements_provide_traceability_ids() {
+            use thrum_core::spec::{Priority, Spec, SpecRequirement};
+
+            let spec = Spec {
+                requirements: vec![
+                    SpecRequirement {
+                        id: "REQ-FEAT-001".into(),
+                        description: "First requirement".into(),
+                        rationale: String::new(),
+                        priority: Priority::P1,
+                        safety_relevance: None,
+                    },
+                    SpecRequirement {
+                        id: "REQ-FEAT-002".into(),
+                        description: "Second requirement".into(),
+                        rationale: String::new(),
+                        priority: Priority::P2,
+                        safety_relevance: None,
+                    },
+                ],
+                ..Default::default()
+            };
+
+            let ids = spec.requirement_ids();
+            assert_eq!(ids, vec!["REQ-FEAT-001", "REQ-FEAT-002"]);
+        }
+
+        /// Proof obligations configure Gate 2 checks.
+        #[test]
+        fn spec_proof_obligations_checked_at_gate2() {
+            use thrum_core::spec::{ProofObligation, Spec};
+
+            let spec = Spec {
+                proof_obligations: vec![
+                    ProofObligation {
+                        property: "Memory safety".into(),
+                        prover: "Z3".into(),
+                        proof_file: Some("proofs/safety.z3".into()),
+                    },
+                    ProofObligation {
+                        property: "Type soundness".into(),
+                        prover: "Rocq".into(),
+                        proof_file: Some("proofs/types.v".into()),
+                    },
+                ],
+                ..Default::default()
+            };
+
+            // Proof obligations should drive Gate 2 checks
+            assert_eq!(spec.proof_obligations.len(), 2);
+            assert_eq!(spec.proof_obligations[0].prover, "Z3");
+            assert_eq!(spec.proof_obligations[1].prover, "Rocq");
+        }
+
+        /// Implementer receives spec as Markdown context.
+        #[test]
+        fn implementer_gets_spec_markdown() {
+            use thrum_core::spec::{DesignSpec, Priority, Spec, SpecRequirement};
+            use thrum_core::task::{RepoName, Task};
+
+            let spec = Spec {
+                title: "Implement feature".into(),
+                context: "Technical context".into(),
+                requirements: vec![SpecRequirement {
+                    id: "REQ-001".into(),
+                    description: "Must implement".into(),
+                    rationale: String::new(),
+                    priority: Priority::P1,
+                    safety_relevance: None,
+                }],
+                design: DesignSpec {
+                    approach: "Add module".into(),
+                    affected_files: vec!["src/new.rs".into()],
+                    ..Default::default()
+                },
+                acceptance_criteria: vec!["Works (TEST)".into()],
+                proof_obligations: Vec::new(),
+                test_plan: Vec::new(),
+            };
+
+            let mut task = Task::new(RepoName::new("test"), "Add feature".into(), "desc".into());
+            task.spec = Some(spec);
+
+            let prompt = build_implementation_prompt(&task, "auto/TASK-0001/test/add-feature");
+            // When spec exists, the prompt uses spec.to_markdown()
+            assert!(prompt.contains("# Implement feature"));
+            assert!(prompt.contains("REQ-001"));
+            assert!(prompt.contains("src/new.rs"));
+        }
+
+        /// Gate checks verify implementation matches spec.
+        #[test]
+        fn gate_checks_verify_spec_compliance() {
+            use thrum_core::gate::run_spec_compliance_check;
+            use thrum_core::spec::{ComplianceSeverity, DesignSpec, ProofObligation, Spec};
+
+            let spec = Spec {
+                design: DesignSpec {
+                    affected_files: vec!["src/lib.rs".into(), "src/extra.rs".into()],
+                    ..Default::default()
+                },
+                proof_obligations: vec![ProofObligation {
+                    property: "safety".into(),
+                    prover: "Z3".into(),
+                    proof_file: Some("proofs/safety.z3".into()),
+                }],
+                ..Default::default()
+            };
+
+            let changed = vec!["src/lib.rs".into()];
+            let result = run_spec_compliance_check(
+                &spec,
+                &changed,
+                std::path::Path::new("/nonexistent-test-root"),
+            );
+
+            // Should fail because proof file doesn't exist (Error severity)
+            assert!(!result.passed);
+            assert!(result.stdout.contains("safety.z3"));
+
+            // Separate test: only affected_files mismatch (Warning severity)
+            let spec_no_proofs = Spec {
+                design: DesignSpec {
+                    affected_files: vec!["src/missing.rs".into()],
+                    ..Default::default()
+                },
+                ..Default::default()
+            };
+            let issues = spec_no_proofs
+                .verify_implementation(&["src/lib.rs".into()], std::path::Path::new("/tmp"));
+            assert_eq!(issues.len(), 1);
+            assert_eq!(issues[0].severity, ComplianceSeverity::Warning);
+        }
+
+        /// Planner task JSON with embedded spec deserializes correctly.
+        ///
+        /// This validates the actual JSON deserialization path used by the planner
+        /// when it produces a spec as part of its output.
+        #[test]
+        fn planner_task_json_with_spec_deserializes() {
+            use thrum_core::spec::Spec;
+            use thrum_core::task::{RepoName, Task};
+
+            // Simulate the JSON the planner agent would produce
+            let planner_json = r#"[{
+                "repo": "loom",
+                "title": "Add safety check",
+                "description": "Implement safety verification",
+                "acceptance_criteria": ["Safety check works (TEST)"],
+                "requirement_id": "REQ-SAFETY-001",
+                "spec": {
+                    "title": "Safety check implementation",
+                    "context": "Required for ASIL compliance",
+                    "requirements": [{
+                        "id": "REQ-SAFETY-001",
+                        "description": "Safety verification must pass",
+                        "rationale": "ISO 26262 compliance",
+                        "priority": "P0",
+                        "safety_relevance": "ASIL B"
+                    }],
+                    "design": {
+                        "approach": "Add runtime checks",
+                        "affected_files": ["src/safety.rs", "src/lib.rs"],
+                        "interfaces": ["fn verify_safety() -> Result<()>"],
+                        "constraints": ["Must not add > 1ms latency"]
+                    },
+                    "acceptance_criteria": [
+                        "Safety check passes (TEST)",
+                        "No performance regression (BENCH)"
+                    ],
+                    "proof_obligations": [{
+                        "property": "Safety invariant holds",
+                        "prover": "Z3",
+                        "proof_file": "proofs/safety.z3"
+                    }],
+                    "test_plan": ["Test edge cases with malformed input"]
+                }
+            }]"#;
+
+            // This is the exact deserialization path used in invoke_planner()
+            #[derive(serde::Deserialize)]
+            #[allow(dead_code)]
+            struct PlannerTask {
+                repo: String,
+                title: String,
+                #[serde(default)]
+                description: String,
+                #[serde(default)]
+                acceptance_criteria: Vec<String>,
+                #[serde(default)]
+                requirement_id: Option<String>,
+                #[serde(default)]
+                spec: Option<Spec>,
+            }
+
+            let tasks: Vec<PlannerTask> = serde_json::from_str(planner_json).unwrap();
+            assert_eq!(tasks.len(), 1);
+
+            let pt = &tasks[0];
+            assert_eq!(pt.repo, "loom");
+            assert!(pt.spec.is_some());
+
+            let spec = pt.spec.as_ref().unwrap();
+            assert_eq!(spec.title, "Safety check implementation");
+            assert_eq!(spec.requirements.len(), 1);
+            assert_eq!(spec.requirements[0].id, "REQ-SAFETY-001");
+            assert_eq!(spec.requirements[0].safety_relevance, Some("ASIL B".into()));
+            assert_eq!(spec.design.affected_files.len(), 2);
+            assert_eq!(spec.proof_obligations.len(), 1);
+            assert_eq!(spec.proof_obligations[0].prover, "Z3");
+
+            // Simulate what invoke_planner does: create task and store spec
+            let mut task = Task::new(
+                RepoName::new(&pt.repo),
+                pt.title.clone(),
+                pt.description.clone(),
+            );
+            task.requirement_id = pt.requirement_id.clone();
+
+            if let Some(ref spec) = pt.spec {
+                task.acceptance_criteria = spec.tagged_acceptance_criteria();
+                if task.requirement_id.is_none() {
+                    task.requirement_id = spec.requirements.first().map(|r| r.id.clone());
+                }
+                task.spec = Some(spec.clone());
+            }
+
+            assert!(task.spec.is_some());
+            assert_eq!(task.requirement_id, Some("REQ-SAFETY-001".into()));
+            assert!(!task.acceptance_criteria.is_empty());
+        }
+
+        /// Spec requirement IDs feed into separate trace records per requirement.
+        #[test]
+        fn spec_requirements_emit_separate_trace_records() {
+            use thrum_core::spec::{Priority, Spec, SpecRequirement};
+            use thrum_core::traceability::{TraceArtifact, TraceRecord, TraceabilityMatrix};
+
+            let spec = Spec {
+                requirements: vec![
+                    SpecRequirement {
+                        id: "REQ-A".into(),
+                        description: "Requirement A".into(),
+                        rationale: "Reason A".into(),
+                        priority: Priority::P0,
+                        safety_relevance: None,
+                    },
+                    SpecRequirement {
+                        id: "REQ-B".into(),
+                        description: "Requirement B".into(),
+                        rationale: "Reason B".into(),
+                        priority: Priority::P1,
+                        safety_relevance: Some("ASIL D".into()),
+                    },
+                ],
+                ..Default::default()
+            };
+
+            // Simulate what run_task_pipeline does: emit trace records per requirement
+            let mut records = Vec::new();
+            for req in &spec.requirements {
+                records.push(TraceRecord {
+                    id: records.len() as i64 + 1,
+                    task_id: 42,
+                    requirement_id: req.id.clone(),
+                    artifact: TraceArtifact::Requirement {
+                        title: req.description.clone(),
+                        description: req.rationale.clone(),
+                    },
+                    created_at: chrono::Utc::now(),
+                });
+            }
+
+            // Verify that the traceability matrix has entries for each requirement
+            let matrix = TraceabilityMatrix::from_records(&records);
+            assert_eq!(matrix.entries.len(), 2);
+
+            let req_a = matrix
+                .entries
+                .iter()
+                .find(|e| e.requirement_id == "REQ-A")
+                .unwrap();
+            assert!(req_a.design.is_none()); // no design yet
+
+            let req_b = matrix
+                .entries
+                .iter()
+                .find(|e| e.requirement_id == "REQ-B")
+                .unwrap();
+            assert!(req_b.design.is_none());
+
+            // CSV export should list both requirement IDs
+            let csv = matrix.to_csv();
+            assert!(csv.contains("REQ-A"));
+            assert!(csv.contains("REQ-B"));
+        }
+
+        /// Spec proof obligations are checked via run_spec_proof_checks.
+        #[test]
+        fn run_spec_proof_checks_with_obligations() {
+            use thrum_core::gate::run_spec_proof_checks;
+            use thrum_core::repo::RepoConfig;
+            use thrum_core::spec::{ProofObligation, Spec};
+            use thrum_core::task::RepoName;
+
+            let tmp = std::env::temp_dir().join("thrum-proof-test");
+            let _ = std::fs::create_dir_all(&tmp);
+
+            // Create a proof file to satisfy one obligation
+            let proof_path = tmp.join("existing.z3");
+            std::fs::write(&proof_path, "(check-sat)").unwrap();
+
+            let spec = Spec {
+                proof_obligations: vec![
+                    ProofObligation {
+                        property: "Existing proof".into(),
+                        prover: "Z3".into(),
+                        proof_file: Some("existing.z3".into()),
+                    },
+                    ProofObligation {
+                        property: "Missing proof".into(),
+                        prover: "Rocq".into(),
+                        proof_file: Some("missing.v".into()),
+                    },
+                ],
+                ..Default::default()
+            };
+
+            let repo = RepoConfig {
+                name: RepoName::new("test"),
+                path: tmp.clone(),
+                build_cmd: "true".into(),
+                test_cmd: "true".into(),
+                fmt_cmd: "true".into(),
+                lint_cmd: "true".into(),
+                verify_cmd: None, // no verify_cmd, so falls back to file existence check
+                proofs_cmd: None,
+                claude_md: None,
+                safety_target: None,
+                ci: None,
+                checks: thrum_core::repo::default_checks(),
+                mutants: None,
+                trust: None,
+            };
+
+            let checks = run_spec_proof_checks(&spec, &repo).unwrap();
+
+            // Should produce checks for both obligations
+            assert_eq!(checks.len(), 2);
+
+            // Existing proof file should pass
+            let z3_check = checks.iter().find(|c| c.name.contains("z3")).unwrap();
+            assert!(z3_check.passed, "Z3 proof file exists and should pass");
+
+            // Missing proof file should fail
+            let rocq_check = checks.iter().find(|c| c.name.contains("rocq")).unwrap();
+            assert!(
+                !rocq_check.passed,
+                "Rocq proof file missing and should fail"
+            );
+
+            // Cleanup
+            let _ = std::fs::remove_dir_all(&tmp);
+        }
+
+        /// Spec acceptance_criteria become tagged criteria on the task.
+        #[test]
+        fn spec_acceptance_criteria_become_tagged() {
+            use thrum_core::spec::Spec;
+            use thrum_core::task::{RepoName, Task};
+            use thrum_core::verification;
+
+            let spec = Spec {
+                title: "Tagged criteria test".into(),
+                acceptance_criteria: vec![
+                    "Tests pass (TEST)".into(),
+                    "Dashboard shows results (BROWSER)".into(),
+                    "Lint clean (LINT)".into(),
+                ],
+                ..Default::default()
+            };
+
+            let mut task = Task::new(RepoName::new("test"), "Test".into(), "desc".into());
+            task.acceptance_criteria = spec.tagged_acceptance_criteria();
+            let audit = verification::audit_criteria(&task.acceptance_criteria);
+            task.tagged_criteria = audit.tagged_criteria;
+
+            assert_eq!(task.tagged_criteria.len(), 3);
+            // Each criterion should have a tag
+            for tc in &task.tagged_criteria {
+                assert!(
+                    !tc.tag.as_tag_str().is_empty(),
+                    "criterion '{}' should have a tag",
+                    tc.description
+                );
+            }
         }
     }
 }
diff --git a/crates/thrum-runner/src/sandbox.rs b/crates/thrum-runner/src/sandbox.rs
index e0f085a..29394a6 100644
--- a/crates/thrum-runner/src/sandbox.rs
+++ b/crates/thrum-runner/src/sandbox.rs
@@ -376,7 +376,7 @@ pub async fn create_sandbox(config: &SandboxConfig) -> Box<dyn Sandbox> {
             Box::new(OsNativeSandbox::new(config.clone()))
         }
         _ => {
-            if config.backend != "none" {
+            if config.backend != "none" && config.backend != "observe" {
                 tracing::warn!(backend = %config.backend, "unknown sandbox backend, using passthrough");
             }
             tracing::info!("using passthrough (no sandbox)");
@@ -385,6 +385,278 @@ pub async fn create_sandbox(config: &SandboxConfig) -> Box<dyn Sandbox> {
     }
 }
 
+/// Returns true if the sandbox config is in observe mode.
+pub fn is_observe_mode(config: &SandboxConfig) -> bool {
+    config.backend == "observe"
+}
+
+/// Write a macOS seatbelt profile to a temp file for sandbox-exec.
+///
+/// The profile restricts the agent to:
+/// - **Write**: only `work_dir`, `scratch_dir`, `/tmp`
+/// - **Read**: system paths, Rust toolchain, agent configs, and the above
+/// - **Network**: allowed (agents need API access)
+/// - **Process**: exec and fork allowed
+///
+/// Returns the path to the profile file (caller cleans up).
+pub fn write_seatbelt_profile(work_dir: &Path, scratch_dir: &Path) -> Result<PathBuf> {
+    // sandbox-exec requires absolute paths in subpath rules.
+    let work_dir = std::fs::canonicalize(work_dir)
+        .unwrap_or_else(|_| std::env::current_dir().unwrap_or_default().join(work_dir));
+    let scratch_dir = std::fs::canonicalize(scratch_dir).unwrap_or_else(|_| {
+        std::env::current_dir()
+            .unwrap_or_default()
+            .join(scratch_dir)
+    });
+    let home = std::env::var("HOME").unwrap_or_else(|_| "/Users/nobody".into());
+
+    // On macOS, $TMPDIR is /private/var/folders/xx/.../T/, NOT /tmp.
+    // Node.js and Bun write V8 code cache and temp files here.
+    let tmpdir = std::env::temp_dir();
+    let tmpdir = std::fs::canonicalize(&tmpdir).unwrap_or(tmpdir);
+
+    // Git worktrees: the worktree dir (work_dir) contains a `.git` *file*
+    // pointing to `<repo_root>/.git/worktrees/<name>`. Git commit/branch/ref
+    // operations write to that directory, not the worktree itself. We must
+    // allow writes there or agents cannot commit.
+    //
+    // Additionally, git worktrees share the main repo's objects/ and refs/
+    // directories. `git add` writes blob objects to .git/objects/ and
+    // `git commit` updates refs in .git/refs/. Without write access to
+    // the common git dir, agents in worktrees cannot commit at all.
+    let (git_worktrees_dir, git_common_dir) = {
+        let gitdir_file = work_dir.join(".git");
+        if gitdir_file.is_file() {
+            // Read the gitdir pointer: "gitdir: /path/to/.git/worktrees/<name>"
+            let worktree_git_dir = std::fs::read_to_string(&gitdir_file)
+                .ok()
+                .and_then(|content| {
+                    content
+                        .strip_prefix("gitdir: ")
+                        .map(|p| PathBuf::from(p.trim()))
+                });
+
+            // Resolve the common dir (the main .git directory) which contains
+            // the shared objects/ and refs/ directories.
+            let common_dir = worktree_git_dir.as_ref().and_then(|wt_dir| {
+                let commondir_file = wt_dir.join("commondir");
+                std::fs::read_to_string(&commondir_file).ok().map(|rel| {
+                    let rel = rel.trim();
+                    if Path::new(rel).is_absolute() {
+                        PathBuf::from(rel)
+                    } else {
+                        // commondir is relative to the worktree git dir
+                        let resolved = wt_dir.join(rel);
+                        std::fs::canonicalize(&resolved).unwrap_or(resolved)
+                    }
+                })
+            });
+
+            (worktree_git_dir, common_dir)
+        } else {
+            (None, None)
+        }
+    };
+
+    let mut git_rules = String::new();
+    if let Some(d) = &git_worktrees_dir {
+        git_rules.push_str(&format!(
+            "    ;; Git worktree metadata (refs, HEAD, index)\n    (subpath \"{}\")\n",
+            d.display()
+        ));
+    }
+    if let Some(d) = &git_common_dir {
+        // Grant write access only to the specific subdirectories agents need,
+        // NOT the entire .git/ directory. This prevents agents from modifying
+        // .git/config (which can set core.bare=true and break the repo).
+        for subdir in &["objects", "refs", "info", "logs"] {
+            git_rules.push_str(&format!(
+                "    ;; Git shared {subdir}\n    (subpath \"{}/{}\")\n",
+                d.display(),
+                subdir
+            ));
+        }
+        // Agents also need to write packed-refs and shallow (top-level files).
+        for file in &["packed-refs", "shallow", "FETCH_HEAD"] {
+            git_rules.push_str(&format!("    (literal \"{}/{}\")\n", d.display(), file));
+        }
+    }
+    let git_worktrees_rule = git_rules;
+
+    let profile = format!(
+        r#"(version 1)
+(deny default)
+
+;; Process lifecycle
+(allow process*)
+(allow signal)
+
+;; macOS IPC, Mach, sysctl (required for system frameworks, dyld, etc.)
+(allow sysctl*)
+(allow mach*)
+(allow ipc*)
+
+;; Network (agents need API access for LLM calls)
+(allow network*)
+
+;; Read access — unrestricted. Restricting reads breaks dyld, system
+;; frameworks, and node/npm in hard-to-predict ways. The security
+;; boundary is on *writes*.
+(allow file-read*)
+
+;; Write access — only worktree, scratch, temp, and essential caches.
+;; This is the core sandbox constraint: agents cannot write outside
+;; their designated working area.
+(allow file-write*
+    (subpath "{work_dir}")
+    (subpath "{scratch_dir}")
+{git_worktrees_rule}
+    (subpath "/private/tmp")
+    (subpath "/tmp")
+    (subpath "/dev")
+    ;; Cargo build cache (shared across agents)
+    (subpath "{home}/.cargo/registry")
+    (subpath "{home}/.cargo/git")
+    ;; Claude session state
+    (subpath "{home}/.claude")
+    ;; npm/npx cache — required for MCP server spawning via npx.
+    ;; Without this, npm hangs writing debug logs and MCP servers never start.
+    (subpath "{home}/.npm")
+    ;; Bun runtime cache — Claude CLI is a Bun binary.
+    (subpath "{home}/.bun")
+    ;; macOS per-user temp directory ($TMPDIR != /tmp on macOS).
+    ;; Node.js writes V8 compiled code cache here.
+    (subpath "{tmpdir}")
+)
+"#,
+        home = home,
+        work_dir = work_dir.display(),
+        scratch_dir = scratch_dir.display(),
+        tmpdir = tmpdir.display(),
+        git_worktrees_rule = git_worktrees_rule,
+    );
+
+    let profile_path = std::env::temp_dir().join(format!(
+        "thrum-seatbelt-{}-{}.sb",
+        std::process::id(),
+        std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_millis()
+    ));
+    std::fs::write(&profile_path, &profile).context("failed to write seatbelt profile")?;
+
+    tracing::debug!(
+        profile = %profile_path.display(),
+        work_dir = %work_dir.display(),
+        scratch_dir = %scratch_dir.display(),
+        "wrote seatbelt sandbox profile"
+    );
+
+    Ok(profile_path)
+}
+
+/// Audit file writes after an observe-mode run.
+///
+/// Compares actual filesystem modifications against the seatbelt allow-list
+/// and logs warnings for any writes that would have been denied under
+/// enforcement. Returns a list of would-be violations.
+///
+/// This is deliberately conservative: it only checks git-tracked changes
+/// (via `git status --porcelain`) rather than trying to trace all syscalls.
+/// The goal is to surface the most common violations (agent writing outside
+/// its worktree) without requiring root or DTrace.
+pub fn audit_observe_violations(work_dir: &Path, scratch_dir: &Path) -> Vec<String> {
+    let home = std::env::var("HOME").unwrap_or_else(|_| "/Users/nobody".into());
+    let home = Path::new(&home);
+
+    // Allowed write paths (mirrors the seatbelt profile).
+    let allowed: Vec<std::path::PathBuf> = vec![
+        work_dir.to_path_buf(),
+        scratch_dir.to_path_buf(),
+        PathBuf::from("/private/tmp"),
+        PathBuf::from("/tmp"),
+        PathBuf::from("/dev"),
+        home.join(".cargo/registry"),
+        home.join(".cargo/git"),
+        home.join(".claude"),
+    ];
+
+    let is_allowed = |path: &Path| -> bool {
+        let abs = if path.is_absolute() {
+            path.to_path_buf()
+        } else {
+            work_dir.join(path)
+        };
+        allowed.iter().any(|a| abs.starts_with(a))
+    };
+
+    // Use git status to find modified/created files in the worktree.
+    let output = std::process::Command::new("git")
+        .args(["status", "--porcelain", "-uall"])
+        .current_dir(work_dir)
+        .output();
+
+    let mut violations = Vec::new();
+
+    match output {
+        Ok(out) if out.status.success() => {
+            let stdout = String::from_utf8_lossy(&out.stdout);
+            for line in stdout.lines() {
+                // porcelain format: XY filename (or XY old -> new for renames)
+                if line.len() < 4 {
+                    continue;
+                }
+                let file_part = &line[3..];
+                // Handle renames: "old -> new"
+                let filename = file_part.split(" -> ").last().unwrap_or(file_part);
+                let path = work_dir.join(filename);
+                if !is_allowed(&path) {
+                    violations.push(filename.to_string());
+                }
+            }
+        }
+        Ok(out) => {
+            tracing::debug!(
+                stderr = %String::from_utf8_lossy(&out.stderr),
+                "git status failed during observe audit"
+            );
+        }
+        Err(e) => {
+            tracing::debug!(error = %e, "could not run git status for observe audit");
+        }
+    }
+
+    if violations.is_empty() {
+        tracing::info!(
+            work_dir = %work_dir.display(),
+            "sandbox observe: all writes within allowed paths"
+        );
+    } else {
+        for v in &violations {
+            tracing::warn!(
+                file = %v,
+                work_dir = %work_dir.display(),
+                "sandbox observe: write WOULD BE DENIED under enforcement"
+            );
+        }
+    }
+
+    violations
+}
+
+/// Create a scratch directory for a task.
+///
+/// Returns the path to the scratch directory (e.g., `scratch/TASK-0042/`).
+pub fn create_scratch_dir(base_dir: &Path, task_slug: &str) -> Result<PathBuf> {
+    let scratch = base_dir.join("scratch").join(task_slug);
+    std::fs::create_dir_all(&scratch).context(format!(
+        "failed to create scratch dir: {}",
+        scratch.display()
+    ))?;
+    Ok(scratch)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -415,4 +687,95 @@ mod tests {
         let sandbox = create_sandbox(&config).await;
         assert_eq!(sandbox.name(), "none");
     }
+
+    #[test]
+    fn seatbelt_profile_written_to_disk() {
+        let work = tempfile::tempdir().unwrap();
+        let scratch = tempfile::tempdir().unwrap();
+        let path = write_seatbelt_profile(work.path(), scratch.path()).unwrap();
+        assert!(path.exists(), "profile file should be written");
+        let content = std::fs::read_to_string(&path).unwrap();
+        assert!(content.contains("(version 1)"));
+        assert!(content.contains(&work.path().display().to_string()));
+        assert!(content.contains(&scratch.path().display().to_string()));
+        std::fs::remove_file(path).unwrap();
+    }
+
+    #[test]
+    fn scratch_dir_created() {
+        let base = tempfile::tempdir().unwrap();
+        let scratch = create_scratch_dir(base.path(), "TASK-0042").unwrap();
+        assert!(scratch.exists());
+        assert!(scratch.ends_with("scratch/TASK-0042"));
+    }
+
+    #[test]
+    fn is_observe_mode_returns_true_for_observe() {
+        let config = SandboxConfig {
+            backend: "observe".into(),
+            ..Default::default()
+        };
+        assert!(is_observe_mode(&config));
+    }
+
+    #[test]
+    fn is_observe_mode_returns_false_for_others() {
+        for backend in &["none", "os-native", "docker"] {
+            let config = SandboxConfig {
+                backend: backend.to_string(),
+                ..Default::default()
+            };
+            assert!(!is_observe_mode(&config), "should be false for {backend}");
+        }
+    }
+
+    #[tokio::test]
+    async fn create_sandbox_observe_uses_passthrough() {
+        let config = SandboxConfig {
+            backend: "observe".into(),
+            ..Default::default()
+        };
+        let sandbox = create_sandbox(&config).await;
+        // Observe mode falls through to NoSandbox (no enforcement).
+        assert_eq!(sandbox.name(), "none");
+    }
+
+    #[test]
+    fn audit_observe_in_git_repo_no_violations() {
+        // Set up a temp git repo with no uncommitted changes.
+        let dir = tempfile::tempdir().unwrap();
+        let scratch = tempfile::tempdir().unwrap();
+        std::process::Command::new("git")
+            .args(["init"])
+            .current_dir(dir.path())
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.email", "test@test.com"])
+            .current_dir(dir.path())
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["config", "user.name", "Test"])
+            .current_dir(dir.path())
+            .output()
+            .unwrap();
+        std::fs::write(dir.path().join("file.txt"), "hello").unwrap();
+        std::process::Command::new("git")
+            .args(["add", "."])
+            .current_dir(dir.path())
+            .output()
+            .unwrap();
+        std::process::Command::new("git")
+            .args(["commit", "-m", "init"])
+            .current_dir(dir.path())
+            .output()
+            .unwrap();
+
+        let violations = audit_observe_violations(dir.path(), scratch.path());
+        assert!(
+            violations.is_empty(),
+            "clean repo should have no violations"
+        );
+    }
 }
diff --git a/crates/thrum-runner/src/shutdown.rs b/crates/thrum-runner/src/shutdown.rs
new file mode 100644
index 0000000..37e8f74
--- /dev/null
+++ b/crates/thrum-runner/src/shutdown.rs
@@ -0,0 +1,695 @@
+//! Graceful shutdown and startup recovery for the engine.
+//!
+//! Provides:
+//! - **Process tracking**: Global registry of spawned child process PIDs, enabling
+//!   clean SIGTERM→SIGKILL escalation on shutdown.
+//! - **Startup recovery**: Scans for orphaned worktrees, orphaned `claude -p`
+//!   processes, stuck tasks, and dirty main-repo state.
+//! - **Shutdown cleanup**: Kills tracked processes, resets in-flight tasks,
+//!   removes worktrees created during this run, and checks the main repo.
+
+use anyhow::Result;
+use std::collections::HashSet;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+use tokio::sync::Mutex;
+
+/// Registry of child process PIDs spawned by this engine run.
+///
+/// Subprocess functions register PIDs on spawn and unregister on exit.
+/// During graceful shutdown, all registered PIDs receive SIGTERM, then
+/// SIGKILL after a timeout.
+#[derive(Clone, Default)]
+pub struct ProcessTracker {
+    pids: Arc<Mutex<HashSet<u32>>>,
+}
+
+impl ProcessTracker {
+    pub fn new() -> Self {
+        Self {
+            pids: Arc::new(Mutex::new(HashSet::new())),
+        }
+    }
+
+    /// Register a child process PID.
+    pub async fn register(&self, pid: u32) {
+        self.pids.lock().await.insert(pid);
+    }
+
+    /// Unregister a child process PID (it exited normally).
+    pub async fn unregister(&self, pid: u32) {
+        self.pids.lock().await.remove(&pid);
+    }
+
+    /// Get a snapshot of all currently tracked PIDs.
+    pub async fn tracked_pids(&self) -> Vec<u32> {
+        self.pids.lock().await.iter().copied().collect()
+    }
+
+    /// Send SIGTERM to all tracked processes, wait up to `grace_period`,
+    /// then SIGKILL any survivors.
+    pub async fn kill_all(&self, grace_period: std::time::Duration) {
+        let pids = self.tracked_pids().await;
+        if pids.is_empty() {
+            return;
+        }
+
+        tracing::info!(
+            count = pids.len(),
+            "sending SIGTERM to tracked agent processes"
+        );
+
+        for &pid in &pids {
+            send_signal(pid, Signal::Term);
+        }
+
+        // Wait for processes to exit, checking periodically.
+        let start = tokio::time::Instant::now();
+        let check_interval = std::time::Duration::from_secs(1);
+
+        loop {
+            tokio::time::sleep(check_interval).await;
+            let alive: Vec<u32> = pids
+                .iter()
+                .copied()
+                .filter(|&p| is_process_alive(p))
+                .collect();
+            if alive.is_empty() {
+                tracing::info!("all agent processes exited after SIGTERM");
+                break;
+            }
+            if start.elapsed() >= grace_period {
+                tracing::warn!(
+                    count = alive.len(),
+                    "grace period expired — sending SIGKILL to remaining processes"
+                );
+                for &pid in &alive {
+                    send_signal(pid, Signal::Kill);
+                }
+                break;
+            }
+        }
+
+        // Clear the tracker.
+        self.pids.lock().await.clear();
+    }
+}
+
+/// Unix signal types we send during shutdown.
+#[derive(Debug, Clone, Copy)]
+enum Signal {
+    Term,
+    Kill,
+}
+
+/// Send a signal to a process. Best-effort — ignores errors (process may have
+/// already exited).
+fn send_signal(pid: u32, sig: Signal) {
+    #[cfg(unix)]
+    {
+        let signal = match sig {
+            Signal::Term => libc::SIGTERM,
+            Signal::Kill => libc::SIGKILL,
+        };
+        // Safety: we're sending to a known PID. If the process is gone,
+        // kill() returns ESRCH which we ignore.
+        unsafe {
+            libc::kill(pid as libc::pid_t, signal);
+        }
+    }
+    #[cfg(not(unix))]
+    {
+        let _ = (pid, sig);
+        tracing::warn!("process signaling not supported on this platform");
+    }
+}
+
+/// Check if a process is still alive.
+fn is_process_alive(pid: u32) -> bool {
+    #[cfg(unix)]
+    {
+        // kill(pid, 0) checks if the process exists without sending a signal.
+        // Returns 0 if the process exists, -1 with ESRCH if not.
+        unsafe { libc::kill(pid as libc::pid_t, 0) == 0 }
+    }
+    #[cfg(not(unix))]
+    {
+        let _ = pid;
+        false
+    }
+}
+
+// ─── Startup Recovery ───────────────────────────────────────────────────────
+
+/// Scan for orphaned `claude` processes spawned by a previous engine run.
+///
+/// Identifies processes whose command line references `thrum-sysprompt` temp
+/// files (the marker for agent system prompts). Returns the list of killed PIDs.
+pub fn kill_orphaned_claude_processes() -> Vec<u32> {
+    let mut killed = Vec::new();
+
+    #[cfg(unix)]
+    {
+        // Use `ps` to find claude processes with thrum-sysprompt in their args.
+        let output = std::process::Command::new("ps").args(["aux"]).output();
+
+        let output = match output {
+            Ok(o) => o,
+            Err(e) => {
+                tracing::warn!(error = %e, "failed to run ps for orphan detection");
+                return killed;
+            }
+        };
+
+        let stdout = String::from_utf8_lossy(&output.stdout);
+        for line in stdout.lines() {
+            // Match lines that contain both "claude" and "thrum-sysprompt"
+            // but NOT our own PID (don't kill ourselves).
+            if line.contains("thrum-sysprompt") && line.contains("claude") {
+                // Parse PID from ps output (second whitespace-delimited field).
+                let parts: Vec<&str> = line.split_whitespace().collect();
+                if parts.len() >= 2
+                    && let Ok(pid) = parts[1].parse::<u32>()
+                {
+                    let my_pid = std::process::id();
+                    if pid != my_pid {
+                        tracing::warn!(pid, "killing orphaned claude process from previous run");
+                        send_signal(pid, Signal::Term);
+                        killed.push(pid);
+                    }
+                }
+            }
+        }
+    }
+
+    #[cfg(not(unix))]
+    {
+        tracing::debug!("orphaned process scanning not supported on this platform");
+    }
+
+    killed
+}
+
+/// Scan the worktrees directory for orphaned worktrees and remove them.
+///
+/// A worktree is considered orphaned if it exists on disk but has no
+/// corresponding in-flight task. During startup recovery we assume ALL
+/// worktrees are orphaned (no agents should be running at startup).
+pub fn cleanup_orphaned_worktrees(worktrees_dir: &Path, repos_config: &[PathBuf]) -> usize {
+    if !worktrees_dir.exists() {
+        return 0;
+    }
+
+    let entries = match std::fs::read_dir(worktrees_dir) {
+        Ok(e) => e,
+        Err(e) => {
+            tracing::warn!(
+                error = %e,
+                dir = %worktrees_dir.display(),
+                "failed to read worktrees directory"
+            );
+            return 0;
+        }
+    };
+
+    let mut cleaned = 0;
+
+    for entry in entries.flatten() {
+        let path = entry.path();
+        if !path.is_dir() {
+            continue;
+        }
+
+        tracing::warn!(
+            worktree = %path.display(),
+            "removing orphaned worktree from previous run"
+        );
+
+        // Try `git worktree remove --force` from each known repo.
+        let mut removed = false;
+        for repo_path in repos_config {
+            let result = std::process::Command::new("git")
+                .args(["worktree", "remove", "--force", &path.to_string_lossy()])
+                .current_dir(repo_path)
+                .env_remove("GIT_DIR")
+                .env_remove("GIT_INDEX_FILE")
+                .env_remove("GIT_WORK_TREE")
+                .output();
+
+            if let Ok(output) = result
+                && output.status.success()
+            {
+                removed = true;
+                break;
+            }
+        }
+
+        // If git worktree remove didn't work, force-remove the directory.
+        if !removed && path.exists() {
+            if let Err(e) = std::fs::remove_dir_all(&path) {
+                tracing::warn!(
+                    error = %e,
+                    worktree = %path.display(),
+                    "failed to force-remove orphaned worktree"
+                );
+            } else {
+                removed = true;
+            }
+        }
+
+        if removed {
+            tracing::info!(worktree = %path.display(), "cleaned up orphaned worktree");
+            cleaned += 1;
+        }
+    }
+
+    // Prune worktree metadata in all repos.
+    for repo_path in repos_config {
+        let _ = std::process::Command::new("git")
+            .args(["worktree", "prune"])
+            .current_dir(repo_path)
+            .env_remove("GIT_DIR")
+            .env_remove("GIT_INDEX_FILE")
+            .env_remove("GIT_WORK_TREE")
+            .output();
+    }
+
+    cleaned
+}
+
+/// Check git status of a repository for uncommitted changes.
+///
+/// Returns a human-readable summary if the repo is dirty, or `None` if clean.
+pub fn check_repo_dirty(repo_path: &Path) -> Option<String> {
+    let git = match crate::git::GitRepo::open(repo_path) {
+        Ok(g) => g,
+        Err(e) => {
+            tracing::warn!(
+                error = %e,
+                path = %repo_path.display(),
+                "failed to open repo for dirty check"
+            );
+            return None;
+        }
+    };
+
+    match git.is_clean() {
+        Ok(true) => None,
+        Ok(false) => {
+            // Get a quick summary via git status.
+            let output = std::process::Command::new("git")
+                .args(["status", "--porcelain"])
+                .current_dir(repo_path)
+                .env_remove("GIT_DIR")
+                .env_remove("GIT_INDEX_FILE")
+                .env_remove("GIT_WORK_TREE")
+                .output();
+
+            let detail = match output {
+                Ok(o) => String::from_utf8_lossy(&o.stdout).to_string(),
+                Err(_) => "(unable to get details)".to_string(),
+            };
+
+            Some(detail)
+        }
+        Err(e) => {
+            tracing::warn!(
+                error = %e,
+                path = %repo_path.display(),
+                "failed to check repo cleanliness"
+            );
+            None
+        }
+    }
+}
+
+/// Run all startup recovery actions.
+///
+/// Called at the beginning of `run_parallel` before dispatching any agents.
+/// Logs all recovery actions clearly so the operator knows what was cleaned up.
+pub fn run_startup_recovery(
+    db: &redb::Database,
+    event_bus: &crate::event_bus::EventBus,
+    worktrees_dir: &Path,
+    repos_config: &thrum_core::repo::ReposConfig,
+) -> Result<()> {
+    use thrum_core::event::{EventKind, LogLevel};
+
+    tracing::info!("running startup recovery checks");
+
+    // 1. Kill orphaned claude processes.
+    let killed = kill_orphaned_claude_processes();
+    if !killed.is_empty() {
+        let msg = format!(
+            "startup recovery: killed {} orphaned claude process(es) (PIDs: {:?})",
+            killed.len(),
+            killed
+        );
+        tracing::warn!("{msg}");
+        event_bus.emit(EventKind::EngineLog {
+            level: LogLevel::Warn,
+            message: msg,
+        });
+    }
+
+    // 2. Scan and clean orphaned worktrees.
+    let repo_paths: Vec<PathBuf> = repos_config.repo.iter().map(|r| r.path.clone()).collect();
+    let cleaned = cleanup_orphaned_worktrees(worktrees_dir, &repo_paths);
+    if cleaned > 0 {
+        let msg = format!(
+            "startup recovery: removed {cleaned} orphaned worktree(s) from {}",
+            worktrees_dir.display()
+        );
+        tracing::warn!("{msg}");
+        event_bus.emit(EventKind::EngineLog {
+            level: LogLevel::Warn,
+            message: msg,
+        });
+    }
+
+    // 3. Check all managed repos for uncommitted changes.
+    for repo in &repos_config.repo {
+        if let Some(dirty_detail) = check_repo_dirty(&repo.path) {
+            let trimmed: String = dirty_detail.lines().take(10).collect::<Vec<_>>().join(", ");
+            tracing::warn!(
+                repo = %repo.name,
+                path = %repo.path.display(),
+                files = trimmed,
+                "repo has uncommitted changes — agent work may have leaked from a previous run"
+            );
+            event_bus.emit(EventKind::EngineLog {
+                level: LogLevel::Warn,
+                message: format!(
+                    "startup recovery: repo '{}' has uncommitted changes: {}",
+                    repo.name, trimmed
+                ),
+            });
+        }
+    }
+
+    // 4. Recover stuck tasks (already existed, now integrated into this flow).
+    recover_stuck_tasks(db, event_bus)?;
+
+    // 5. Clean up stale thrum-sysprompt temp files.
+    cleanup_stale_sysprompt_files();
+
+    tracing::info!("startup recovery checks complete");
+    Ok(())
+}
+
+/// Recover tasks stuck in transient states from a previous engine run.
+///
+/// On engine startup, any tasks in "claimed", "implementing", or "integrating"
+/// state are orphaned (their agent is no longer running). This function resets
+/// them to a re-dispatchable state so they don't stay stuck forever.
+pub fn recover_stuck_tasks(
+    db: &redb::Database,
+    event_bus: &crate::event_bus::EventBus,
+) -> Result<()> {
+    use thrum_core::event::EventKind;
+    use thrum_core::task::TaskStatus;
+    use thrum_db::task_store::TaskStore;
+
+    let task_store = TaskStore::new(db);
+    let all_tasks = task_store.list(None, None)?;
+    let mut recovered = 0;
+
+    for mut task in all_tasks {
+        let reset_to = match &task.status {
+            TaskStatus::Claimed { .. } | TaskStatus::Implementing { .. } => {
+                // Agent was working on this but the engine stopped.
+                // Reset to Pending so it gets re-dispatched.
+                Some(TaskStatus::Pending)
+            }
+            TaskStatus::Integrating => {
+                // Post-approval integration was in progress.
+                // Reset to Approved so it re-enters the integration path.
+                Some(TaskStatus::Approved)
+            }
+            TaskStatus::Reviewing { .. } => {
+                // Review was in progress — implementation is done, just re-run review.
+                // Reset to Pending to run the full pipeline again (safe, gates will catch issues).
+                Some(TaskStatus::Pending)
+            }
+            _ => None,
+        };
+
+        if let Some(new_status) = reset_to {
+            let old_label = task.status.label().to_string();
+            let new_label = new_status.label();
+            tracing::warn!(
+                task_id = %task.id,
+                from = old_label,
+                to = new_label,
+                "recovering stuck task from previous engine run"
+            );
+            task.status = new_status;
+            task.updated_at = chrono::Utc::now();
+            task_store.update(&task)?;
+            recovered += 1;
+
+            event_bus.emit(EventKind::TaskStateChange {
+                task_id: task.id.clone(),
+                repo: task.repo.clone(),
+                from: old_label,
+                to: task.status.label().to_string(),
+            });
+        }
+    }
+
+    if recovered > 0 {
+        tracing::info!(count = recovered, "recovered stuck tasks");
+        event_bus.emit(EventKind::EngineLog {
+            level: thrum_core::event::LogLevel::Info,
+            message: format!("recovered {recovered} stuck tasks from previous run"),
+        });
+    }
+
+    Ok(())
+}
+
+/// Clean up stale `thrum-sysprompt-*.md` temp files from previous runs.
+fn cleanup_stale_sysprompt_files() {
+    let tmp = std::env::temp_dir();
+    let entries = match std::fs::read_dir(&tmp) {
+        Ok(e) => e,
+        Err(_) => return,
+    };
+
+    let my_pid = std::process::id();
+    let mut cleaned = 0;
+
+    for entry in entries.flatten() {
+        let name = entry.file_name();
+        let name_str = name.to_string_lossy();
+        if name_str.starts_with("thrum-sysprompt-") && name_str.ends_with(".md") {
+            // Extract PID from filename: thrum-sysprompt-{pid}.md
+            let pid_str = name_str
+                .strip_prefix("thrum-sysprompt-")
+                .and_then(|s| s.strip_suffix(".md"));
+
+            if let Some(pid_str) = pid_str
+                && let Ok(pid) = pid_str.parse::<u32>()
+            {
+                // Don't delete our own temp file.
+                if pid == my_pid {
+                    continue;
+                }
+                // Delete if the owning process is no longer alive.
+                if !is_process_alive(pid) {
+                    let _ = std::fs::remove_file(entry.path());
+                    cleaned += 1;
+                }
+            }
+        }
+    }
+
+    if cleaned > 0 {
+        tracing::info!(
+            count = cleaned,
+            "cleaned up stale thrum-sysprompt temp files"
+        );
+    }
+}
+
+// ─── Shutdown Cleanup ───────────────────────────────────────────────────────
+
+/// Run all shutdown cleanup actions.
+///
+/// Called after the dispatch loop exits (either from Ctrl+C/SIGTERM or
+/// natural completion). Ensures no orphaned state is left behind.
+pub async fn run_shutdown_cleanup(
+    db: &redb::Database,
+    event_bus: &crate::event_bus::EventBus,
+    process_tracker: &ProcessTracker,
+    worktrees_dir: &Path,
+    repos_config: &thrum_core::repo::ReposConfig,
+    grace_period: std::time::Duration,
+) {
+    use thrum_core::event::{EventKind, LogLevel};
+
+    tracing::info!("running shutdown cleanup");
+
+    // 1. Kill all tracked agent processes.
+    process_tracker.kill_all(grace_period).await;
+
+    // 2. Reset any in-flight tasks back to dispatchable states.
+    match reset_inflight_tasks(db) {
+        Ok(count) => {
+            if count > 0 {
+                let msg = format!("shutdown: reset {count} in-flight task(s) to pending");
+                tracing::info!("{msg}");
+                event_bus.emit(EventKind::EngineLog {
+                    level: LogLevel::Info,
+                    message: msg,
+                });
+            }
+        }
+        Err(e) => {
+            tracing::error!(error = %e, "failed to reset in-flight tasks during shutdown");
+        }
+    }
+
+    // 3. Clean up worktrees.
+    let repo_paths: Vec<PathBuf> = repos_config.repo.iter().map(|r| r.path.clone()).collect();
+    let cleaned = cleanup_orphaned_worktrees(worktrees_dir, &repo_paths);
+    if cleaned > 0 {
+        tracing::info!(count = cleaned, "shutdown: cleaned up worktrees");
+    }
+
+    // 4. Check repos for leaked modifications.
+    for repo in &repos_config.repo {
+        if let Some(dirty_detail) = check_repo_dirty(&repo.path) {
+            let trimmed: String = dirty_detail.lines().take(10).collect::<Vec<_>>().join(", ");
+            tracing::warn!(
+                repo = %repo.name,
+                files = trimmed,
+                "shutdown: repo has unexpected modifications (may need manual cleanup)"
+            );
+            event_bus.emit(EventKind::EngineLog {
+                level: LogLevel::Warn,
+                message: format!(
+                    "shutdown: repo '{}' has modifications: {}",
+                    repo.name, trimmed
+                ),
+            });
+        }
+    }
+
+    // 5. Clean up sysprompt temp files from this run.
+    let tmp = std::env::temp_dir();
+    let my_pid = std::process::id();
+    let my_sysprompt = tmp.join(format!("thrum-sysprompt-{my_pid}.md"));
+    if my_sysprompt.exists() {
+        let _ = std::fs::remove_file(&my_sysprompt);
+    }
+
+    tracing::info!("shutdown cleanup complete");
+    event_bus.emit(EventKind::EngineLog {
+        level: LogLevel::Info,
+        message: "shutdown cleanup complete".into(),
+    });
+}
+
+/// Reset in-flight tasks (Claimed/Implementing/Integrating) back to
+/// dispatchable states during shutdown.
+fn reset_inflight_tasks(db: &redb::Database) -> Result<usize> {
+    use thrum_core::task::TaskStatus;
+    use thrum_db::task_store::TaskStore;
+
+    let task_store = TaskStore::new(db);
+    let all_tasks = task_store.list(None, None)?;
+    let mut reset_count = 0;
+
+    for mut task in all_tasks {
+        let reset_to = match &task.status {
+            TaskStatus::Claimed { .. } | TaskStatus::Implementing { .. } => {
+                Some(TaskStatus::Pending)
+            }
+            TaskStatus::Integrating => Some(TaskStatus::Approved),
+            TaskStatus::Reviewing { .. } => Some(TaskStatus::Pending),
+            _ => None,
+        };
+
+        if let Some(new_status) = reset_to {
+            tracing::info!(
+                task_id = %task.id,
+                from = task.status.label(),
+                to = new_status.label(),
+                "shutdown: resetting in-flight task"
+            );
+            task.status = new_status;
+            task.updated_at = chrono::Utc::now();
+            task_store.update(&task)?;
+            reset_count += 1;
+        }
+    }
+
+    Ok(reset_count)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn process_tracker_new_is_empty() {
+        let rt = tokio::runtime::Runtime::new().unwrap();
+        let tracker = ProcessTracker::new();
+        let pids = rt.block_on(tracker.tracked_pids());
+        assert!(pids.is_empty());
+    }
+
+    #[tokio::test]
+    async fn process_tracker_register_and_unregister() {
+        let tracker = ProcessTracker::new();
+        tracker.register(12345).await;
+        tracker.register(67890).await;
+        assert_eq!(tracker.tracked_pids().await.len(), 2);
+
+        tracker.unregister(12345).await;
+        let pids = tracker.tracked_pids().await;
+        assert_eq!(pids.len(), 1);
+        assert!(pids.contains(&67890));
+    }
+
+    #[cfg(unix)]
+    #[test]
+    fn is_process_alive_current_process() {
+        // Our own PID should be alive.
+        assert!(is_process_alive(std::process::id()));
+    }
+
+    #[cfg(unix)]
+    #[test]
+    fn is_process_alive_nonexistent() {
+        // PID 99999999 almost certainly doesn't exist.
+        assert!(!is_process_alive(99_999_999));
+    }
+
+    #[test]
+    fn kill_orphaned_processes_does_not_panic() {
+        // Should succeed without panicking, even if no orphans exist.
+        let killed = kill_orphaned_claude_processes();
+        // We can't assert much — just that it didn't crash.
+        let _ = killed;
+    }
+
+    #[test]
+    fn check_repo_dirty_nonexistent_path() {
+        // Should return None (logs a warning) for a path that doesn't exist.
+        let result = check_repo_dirty(Path::new("/nonexistent/repo"));
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn cleanup_orphaned_worktrees_nonexistent_dir() {
+        let cleaned = cleanup_orphaned_worktrees(Path::new("/nonexistent/worktrees"), &[]);
+        assert_eq!(cleaned, 0);
+    }
+
+    #[test]
+    fn cleanup_stale_sysprompt_does_not_panic() {
+        cleanup_stale_sysprompt_files();
+    }
+}
diff --git a/crates/thrum-runner/src/subprocess.rs b/crates/thrum-runner/src/subprocess.rs
index 60f71bf..e1d7e7e 100644
--- a/crates/thrum-runner/src/subprocess.rs
+++ b/crates/thrum-runner/src/subprocess.rs
@@ -1,4 +1,5 @@
 use crate::event_bus::EventBus;
+use crate::shutdown::ProcessTracker;
 use anyhow::{Context, Result};
 use std::path::Path;
 use std::time::Duration;
@@ -23,20 +24,77 @@ impl SubprocessOutput {
 
 /// Run a shell command with a timeout (non-streaming, original behavior).
 pub async fn run_cmd(cmd: &str, cwd: &Path, timeout: Duration) -> Result<SubprocessOutput> {
-    tracing::debug!(cmd, ?cwd, ?timeout, "spawning subprocess");
-
-    let child = Command::new("sh")
-        .arg("-c")
-        .arg(cmd)
-        .current_dir(cwd)
-        // Allow Claude CLI subprocess to run inside a parent Claude session.
-        .env_remove("CLAUDECODE")
-        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
-        .spawn()
-        .context(format!("failed to spawn: {cmd}"))?;
-
-    match tokio::time::timeout(timeout, child.wait_with_output()).await {
+    run_cmd_with_sandbox(cmd, cwd, timeout, None).await
+}
+
+/// Run a shell command with optional macOS seatbelt sandbox isolation.
+///
+/// When `sandbox_profile` is `Some`, wraps the command with `sandbox-exec -f <profile>`.
+/// On non-macOS platforms, the profile is ignored.
+pub async fn run_cmd_with_sandbox(
+    cmd: &str,
+    cwd: &Path,
+    timeout: Duration,
+    sandbox_profile: Option<&Path>,
+) -> Result<SubprocessOutput> {
+    run_cmd_with_sandbox_tracked(cmd, cwd, timeout, sandbox_profile, None).await
+}
+
+/// Run a shell command with optional sandbox and process tracking.
+///
+/// When a `ProcessTracker` is provided, the child PID is registered before
+/// waiting and unregistered after the process exits. This enables the shutdown
+/// coordinator to send SIGTERM/SIGKILL to long-running agent processes.
+pub async fn run_cmd_with_sandbox_tracked(
+    cmd: &str,
+    cwd: &Path,
+    timeout: Duration,
+    sandbox_profile: Option<&Path>,
+    tracker: Option<&ProcessTracker>,
+) -> Result<SubprocessOutput> {
+    tracing::debug!(
+        cmd,
+        ?cwd,
+        ?timeout,
+        sandbox = sandbox_profile.is_some(),
+        "spawning subprocess"
+    );
+
+    let child = if let Some(profile) = sandbox_profile.filter(|_| cfg!(target_os = "macos")) {
+        tracing::info!(profile = %profile.display(), "sandboxing with seatbelt");
+        Command::new("sandbox-exec")
+            .arg("-f")
+            .arg(profile)
+            .arg("sh")
+            .arg("-c")
+            .arg(cmd)
+            .current_dir(cwd)
+            .env_remove("CLAUDECODE")
+            .env_remove("CLAUDE_CODE_ENTRYPOINT")
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
+            .context(format!("failed to spawn sandboxed: {cmd}"))?
+    } else {
+        Command::new("sh")
+            .arg("-c")
+            .arg(cmd)
+            .current_dir(cwd)
+            .env_remove("CLAUDECODE")
+            .env_remove("CLAUDE_CODE_ENTRYPOINT")
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
+            .context(format!("failed to spawn: {cmd}"))?
+    };
+
+    // Register the child PID with the process tracker for shutdown coordination.
+    let pid = child.id();
+    if let (Some(tracker), Some(pid)) = (tracker, pid) {
+        tracker.register(pid).await;
+    }
+
+    let result = match tokio::time::timeout(timeout, child.wait_with_output()).await {
         Ok(Ok(output)) => {
             let result = SubprocessOutput {
                 stdout: String::from_utf8_lossy(&output.stdout).to_string(),
@@ -61,7 +119,14 @@ pub async fn run_cmd(cmd: &str, cwd: &Path, timeout: Duration) -> Result<Subproc
                 timed_out: true,
             })
         }
+    };
+
+    // Unregister the PID — process has exited (or timed out).
+    if let (Some(tracker), Some(pid)) = (tracker, pid) {
+        tracker.unregister(pid).await;
     }
+
+    result
 }
 
 /// Callback for streaming subprocess output lines.
@@ -84,19 +149,77 @@ pub async fn run_cmd_streaming(
     timeout: Duration,
     event_bus: &EventBus,
     line_callback: LineCallback,
+    sandbox_profile: Option<&Path>,
 ) -> Result<SubprocessOutput> {
-    tracing::debug!(cmd, ?cwd, ?timeout, "spawning streaming subprocess");
-
-    let mut child = Command::new("sh")
-        .arg("-c")
-        .arg(cmd)
-        .current_dir(cwd)
-        // Allow Claude CLI subprocess to run inside a parent Claude session.
-        .env_remove("CLAUDECODE")
-        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
-        .spawn()
-        .context(format!("failed to spawn: {cmd}"))?;
+    run_cmd_streaming_tracked(
+        cmd,
+        cwd,
+        timeout,
+        event_bus,
+        line_callback,
+        None,
+        sandbox_profile,
+    )
+    .await
+}
+
+/// Run a shell command with streaming output and process tracking.
+///
+/// Like `run_cmd_streaming`, but registers the child PID with the
+/// `ProcessTracker` for graceful shutdown support.
+///
+/// When `sandbox_profile` is `Some` on macOS, wraps the command with
+/// `sandbox-exec -f <profile>` for seatbelt isolation.
+pub async fn run_cmd_streaming_tracked(
+    cmd: &str,
+    cwd: &Path,
+    timeout: Duration,
+    event_bus: &EventBus,
+    line_callback: LineCallback,
+    tracker: Option<&ProcessTracker>,
+    sandbox_profile: Option<&Path>,
+) -> Result<SubprocessOutput> {
+    tracing::debug!(
+        cmd,
+        ?cwd,
+        ?timeout,
+        sandbox = sandbox_profile.is_some(),
+        "spawning streaming subprocess"
+    );
+
+    let mut child = if let Some(profile) = sandbox_profile.filter(|_| cfg!(target_os = "macos")) {
+        tracing::info!(profile = %profile.display(), "sandboxing streaming subprocess with seatbelt");
+        Command::new("sandbox-exec")
+            .arg("-f")
+            .arg(profile)
+            .arg("sh")
+            .arg("-c")
+            .arg(cmd)
+            .current_dir(cwd)
+            .env_remove("CLAUDECODE")
+            .env_remove("CLAUDE_CODE_ENTRYPOINT")
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
+            .context(format!("failed to spawn sandboxed: {cmd}"))?
+    } else {
+        Command::new("sh")
+            .arg("-c")
+            .arg(cmd)
+            .current_dir(cwd)
+            .env_remove("CLAUDECODE")
+            .env_remove("CLAUDE_CODE_ENTRYPOINT")
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
+            .context(format!("failed to spawn: {cmd}"))?
+    };
+
+    // Register the child PID with the process tracker for shutdown coordination.
+    let pid = child.id();
+    if let (Some(tracker), Some(pid)) = (tracker, pid) {
+        tracker.register(pid).await;
+    }
 
     let stdout = child.stdout.take().context("failed to capture stdout")?;
     let stderr = child.stderr.take().context("failed to capture stderr")?;
@@ -167,7 +290,7 @@ pub async fn run_cmd_streaming(
     // Use the EventBus reference to keep it alive (needed for the type system)
     let _ = event_bus;
 
-    match tokio::time::timeout(timeout, read_future).await {
+    let result = match tokio::time::timeout(timeout, read_future).await {
         Ok(Ok(status)) => {
             let result = SubprocessOutput {
                 stdout: stdout_buf,
@@ -195,7 +318,14 @@ pub async fn run_cmd_streaming(
                 timed_out: true,
             })
         }
+    };
+
+    // Unregister the PID — process has exited (or timed out).
+    if let (Some(tracker), Some(pid)) = (tracker, pid) {
+        tracker.unregister(pid).await;
     }
+
+    result
 }
 
 /// Run a command and return just stdout, failing on non-zero exit.
diff --git a/crates/thrum-runner/src/sync.rs b/crates/thrum-runner/src/sync.rs
new file mode 100644
index 0000000..6cf367d
--- /dev/null
+++ b/crates/thrum-runner/src/sync.rs
@@ -0,0 +1,682 @@
+//! Remote sync engine: fetch remote main, fast-forward local, rebase in-flight branches.
+//!
+//! This module orchestrates the "sync point" operation:
+//! 1. `git fetch origin main` to get the latest remote state.
+//! 2. Fast-forward (or rebase) local main to match remote.
+//! 3. Rebase all in-flight task branches onto the updated main.
+//! 4. Dispatch rebase agents for any branches with conflicts.
+//! 5. Emit events to the EventBus for real-time dashboard visibility.
+
+use anyhow::{Context, Result};
+use chrono::Utc;
+use std::path::Path;
+use std::process::Command;
+use thrum_core::event::EventKind;
+use thrum_core::sync::{
+    BranchRebaseResult, SyncConfig, SyncPointRecord, SyncStrategy, SyncTrigger,
+};
+use thrum_core::task::{RepoName, TaskId};
+use thrum_db::task_store::TaskStore;
+
+use crate::event_bus::EventBus;
+
+/// Tracks accumulated merges for batched sync strategy.
+#[derive(Debug)]
+pub struct SyncState {
+    /// Number of merges since last sync.
+    pub pending_merges: u32,
+    /// Timestamp of last sync (if any).
+    pub last_sync: Option<chrono::DateTime<Utc>>,
+}
+
+impl Default for SyncState {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SyncState {
+    pub fn new() -> Self {
+        Self {
+            pending_merges: 0,
+            last_sync: None,
+        }
+    }
+
+    /// Record that a PR merge happened.
+    pub fn record_merge(&mut self) {
+        self.pending_merges += 1;
+    }
+
+    /// Check whether we should trigger a sync based on the strategy and current state.
+    pub fn should_sync(&self, config: &SyncConfig) -> bool {
+        if !config.enabled {
+            return false;
+        }
+        match &config.sync_strategy {
+            SyncStrategy::Eager => self.pending_merges > 0,
+            SyncStrategy::Batched {
+                batch_count,
+                interval_secs,
+            } => {
+                if self.pending_merges >= *batch_count {
+                    return true;
+                }
+                // Timer-based: if we have pending merges and enough time has elapsed
+                if self.pending_merges > 0
+                    && let Some(last) = self.last_sync
+                {
+                    let elapsed = Utc::now().signed_duration_since(last);
+                    return elapsed.num_seconds() >= *interval_secs as i64;
+                }
+                false
+            }
+            SyncStrategy::Manual => false,
+        }
+    }
+
+    /// Clear pending state after a successful sync.
+    pub fn clear_pending(&mut self) {
+        self.pending_merges = 0;
+        self.last_sync = Some(Utc::now());
+    }
+}
+
+/// Create a git `Command` targeting a specific repo path.
+///
+/// Removes environment variables (`GIT_DIR`, `GIT_INDEX_FILE`, etc.) that
+/// could leak from parent git contexts (e.g., pre-commit hooks, worktrees)
+/// and cause commands to target the wrong repository.
+fn git_cmd(repo_path: &Path) -> Command {
+    let mut cmd = Command::new("git");
+    cmd.current_dir(repo_path);
+    cmd.env_remove("GIT_DIR");
+    cmd.env_remove("GIT_INDEX_FILE");
+    cmd.env_remove("GIT_WORK_TREE");
+    cmd.env_remove("GIT_OBJECT_DIRECTORY");
+    cmd.env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES");
+    cmd
+}
+
+/// Fetch the latest remote main branch.
+pub fn fetch_remote_main(repo_path: &Path) -> Result<()> {
+    let output = git_cmd(repo_path)
+        .args(["fetch", "origin", "main"])
+        .output()
+        .context("failed to execute git fetch")?;
+
+    if !output.status.success() {
+        // Try master if main fails
+        let output2 = git_cmd(repo_path)
+            .args(["fetch", "origin", "master"])
+            .output()
+            .context("failed to execute git fetch for master")?;
+
+        if !output2.status.success() {
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            anyhow::bail!("git fetch failed: {stderr}");
+        }
+    }
+
+    Ok(())
+}
+
+/// Detect the default branch name (main or master).
+fn detect_default_branch(repo_path: &Path) -> Result<String> {
+    let output = git_cmd(repo_path)
+        .args(["rev-parse", "--verify", "refs/heads/main"])
+        .output()?;
+
+    if output.status.success() {
+        return Ok("main".into());
+    }
+
+    let output = git_cmd(repo_path)
+        .args(["rev-parse", "--verify", "refs/heads/master"])
+        .output()?;
+
+    if output.status.success() {
+        return Ok("master".into());
+    }
+
+    anyhow::bail!("no default branch found (tried main, master)")
+}
+
+/// Get the current local SHA for main.
+pub fn local_main_sha(repo_path: &Path) -> Result<String> {
+    let branch = detect_default_branch(repo_path)?;
+    let output = git_cmd(repo_path)
+        .args(["rev-parse", &format!("refs/heads/{branch}")])
+        .output()
+        .context("failed to get local main SHA")?;
+
+    let sha = String::from_utf8_lossy(&output.stdout).trim().to_string();
+    if sha.is_empty() {
+        anyhow::bail!("local main SHA is empty");
+    }
+    Ok(sha)
+}
+
+/// Fast-forward local main to match remote. Returns the new SHA.
+pub fn fast_forward_main(repo_path: &Path, remote_sha: &str) -> Result<bool> {
+    let branch = detect_default_branch(repo_path)?;
+    let local_sha = local_main_sha(repo_path)?;
+
+    if local_sha == remote_sha {
+        return Ok(false); // Already up to date
+    }
+
+    // Try fast-forward via update-ref
+    let output = git_cmd(repo_path)
+        .args([
+            "update-ref",
+            &format!("refs/heads/{branch}"),
+            remote_sha,
+            &local_sha,
+        ])
+        .output()
+        .context("failed to update-ref for fast-forward")?;
+
+    if output.status.success() {
+        return Ok(true);
+    }
+
+    // If update-ref fails, try a merge-based approach
+    let output = git_cmd(repo_path)
+        .args(["rebase", &format!("origin/{branch}"), &branch])
+        .output()
+        .context("failed to rebase local main onto remote")?;
+
+    if !output.status.success() {
+        // Abort the rebase
+        let _ = git_cmd(repo_path).args(["rebase", "--abort"]).output();
+
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        anyhow::bail!("failed to fast-forward local main: {stderr}");
+    }
+
+    Ok(true)
+}
+
+/// Rebase a single branch onto the updated default branch.
+pub fn rebase_branch(
+    repo_path: &Path,
+    branch: &str,
+    task_id: Option<TaskId>,
+) -> BranchRebaseResult {
+    let default_branch = match detect_default_branch(repo_path) {
+        Ok(b) => b,
+        Err(e) => {
+            return BranchRebaseResult {
+                branch: branch.into(),
+                task_id,
+                success: false,
+                had_conflicts: false,
+                agent_dispatched: false,
+                new_head_sha: None,
+                error: Some(format!("failed to detect default branch: {e}")),
+            };
+        }
+    };
+
+    let output = git_cmd(repo_path)
+        .args(["rebase", &default_branch, branch])
+        .output();
+
+    match output {
+        Ok(out) if out.status.success() => {
+            // Get the new HEAD SHA for this branch
+            let sha_output = git_cmd(repo_path)
+                .args(["rev-parse", &format!("refs/heads/{branch}")])
+                .output();
+
+            let new_sha = sha_output
+                .ok()
+                .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
+                .filter(|s| !s.is_empty());
+
+            BranchRebaseResult {
+                branch: branch.into(),
+                task_id,
+                success: true,
+                had_conflicts: false,
+                agent_dispatched: false,
+                new_head_sha: new_sha,
+                error: None,
+            }
+        }
+        Ok(out) => {
+            // Rebase failed — check if it was a conflict
+            let stderr = String::from_utf8_lossy(&out.stderr);
+            let had_conflicts = stderr.contains("CONFLICT") || stderr.contains("conflict");
+
+            // Abort the rebase
+            let _ = git_cmd(repo_path).args(["rebase", "--abort"]).output();
+
+            BranchRebaseResult {
+                branch: branch.into(),
+                task_id,
+                success: false,
+                had_conflicts,
+                agent_dispatched: false,
+                new_head_sha: None,
+                error: Some(stderr.to_string()),
+            }
+        }
+        Err(e) => BranchRebaseResult {
+            branch: branch.into(),
+            task_id,
+            success: false,
+            had_conflicts: false,
+            agent_dispatched: false,
+            new_head_sha: None,
+            error: Some(format!("failed to execute rebase: {e}")),
+        },
+    }
+}
+
+/// Discover in-flight task branches from the task store.
+pub fn in_flight_branches(task_store: &TaskStore, repo: &RepoName) -> Vec<(String, TaskId)> {
+    let active_statuses = [
+        "implementing",
+        "reviewing",
+        "awaiting-approval",
+        "approved",
+        "integrating",
+    ];
+
+    let mut branches = Vec::new();
+    for status in &active_statuses {
+        if let Ok(tasks) = task_store.list(Some(status), Some(repo)) {
+            for task in tasks {
+                let branch = task.branch_name();
+                branches.push((branch, task.id));
+            }
+        }
+    }
+    branches
+}
+
+/// Execute a full sync operation for a repository.
+pub fn execute_sync(
+    repo_path: &Path,
+    repo: &RepoName,
+    task_store: &TaskStore,
+    event_bus: &EventBus,
+    config: &SyncConfig,
+    trigger: SyncTrigger,
+) -> Result<SyncPointRecord> {
+    // Emit start event
+    event_bus.emit(EventKind::SyncStarted {
+        repo: repo.clone(),
+        trigger: trigger.clone(),
+    });
+
+    let previous_sha = local_main_sha(repo_path)?;
+
+    // Step 1: Fetch remote
+    if let Err(e) = fetch_remote_main(repo_path) {
+        let error_msg = format!("fetch failed: {e}");
+        event_bus.emit(EventKind::SyncFailed {
+            repo: repo.clone(),
+            error: error_msg.clone(),
+            trigger: trigger.clone(),
+        });
+        anyhow::bail!(error_msg);
+    }
+
+    // Step 2: Get remote SHA
+    let branch = detect_default_branch(repo_path)?;
+    let remote_sha_output = git_cmd(repo_path)
+        .args(["rev-parse", &format!("origin/{branch}")])
+        .output()
+        .context("failed to get remote SHA")?;
+
+    let remote_sha = String::from_utf8_lossy(&remote_sha_output.stdout)
+        .trim()
+        .to_string();
+    if remote_sha.is_empty() {
+        let error_msg = "remote SHA is empty after fetch".to_string();
+        event_bus.emit(EventKind::SyncFailed {
+            repo: repo.clone(),
+            error: error_msg.clone(),
+            trigger: trigger.clone(),
+        });
+        anyhow::bail!(error_msg);
+    }
+
+    // Step 3: Fast-forward local main
+    let fast_forward = match fast_forward_main(repo_path, &remote_sha) {
+        Ok(ff) => ff,
+        Err(e) => {
+            let error_msg = format!("fast-forward failed: {e}");
+            event_bus.emit(EventKind::SyncFailed {
+                repo: repo.clone(),
+                error: error_msg.clone(),
+                trigger: trigger.clone(),
+            });
+            anyhow::bail!(error_msg);
+        }
+    };
+
+    // Step 4: Rebase in-flight branches
+    let mut branch_results = Vec::new();
+    let mut branches_rebased = 0u32;
+    let mut branches_conflicted = 0u32;
+
+    if config.auto_rebase {
+        let branches = in_flight_branches(task_store, repo);
+        for (branch_name, task_id) in branches {
+            let mut result = rebase_branch(repo_path, &branch_name, Some(task_id.clone()));
+
+            // Emit per-branch event
+            event_bus.emit(EventKind::BranchRebased {
+                repo: repo.clone(),
+                branch: branch_name.clone(),
+                task_id: Some(task_id.clone()),
+                success: result.success,
+                had_conflicts: result.had_conflicts,
+            });
+
+            if result.success {
+                branches_rebased += 1;
+            }
+            if result.had_conflicts {
+                branches_conflicted += 1;
+
+                // Dispatch rebase agent if configured
+                if config.dispatch_rebase_agent {
+                    result.agent_dispatched = true;
+                    event_bus.emit(EventKind::RebaseAgentDispatched {
+                        repo: repo.clone(),
+                        branch: branch_name.clone(),
+                        task_id: Some(task_id),
+                    });
+                }
+            }
+
+            branch_results.push(result);
+        }
+    }
+
+    // Emit completion event
+    event_bus.emit(EventKind::SyncCompleted {
+        repo: repo.clone(),
+        remote_sha: remote_sha.clone(),
+        branches_rebased,
+        branches_conflicted,
+        trigger: trigger.clone(),
+    });
+
+    let record = SyncPointRecord {
+        id: format!("sync-{}", Utc::now().timestamp_millis()),
+        repo: repo.clone(),
+        remote_sha,
+        previous_local_sha: previous_sha,
+        fast_forward,
+        branch_results,
+        branches_rebased,
+        branches_conflicted,
+        synced_at: Utc::now(),
+        trigger,
+    };
+
+    Ok(record)
+}
+
+/// Update worktree branch tracking refs after a sync.
+///
+/// When local main moves forward, worktrees that track main need their
+/// refs updated so that subsequent rebases use the correct base.
+pub fn update_worktree_bases(
+    repo_path: &Path,
+    worktrees_dir: &Path,
+    _repo: &RepoName,
+    _event_bus: &EventBus,
+) -> Result<()> {
+    if !worktrees_dir.exists() {
+        return Ok(());
+    }
+
+    let branch = detect_default_branch(repo_path)?;
+    let new_sha = local_main_sha(repo_path)?;
+
+    // Update refs in each worktree
+    for entry in std::fs::read_dir(worktrees_dir)? {
+        let entry = entry?;
+        if !entry.file_type()?.is_dir() {
+            continue;
+        }
+
+        let wt_path = entry.path();
+        // Update the worktree's view of main
+        let _ = git_cmd(&wt_path)
+            .args(["update-ref", &format!("refs/heads/{branch}"), &new_sha])
+            .output();
+    }
+
+    Ok(())
+}
+
+/// Trigger a manual sync for a repository (called from API endpoint).
+pub fn trigger_manual_sync(
+    repo_path: &Path,
+    repo: &RepoName,
+    task_store: &TaskStore,
+    event_bus: &EventBus,
+) -> Result<SyncPointRecord> {
+    let config = SyncConfig::default();
+    execute_sync(
+        repo_path,
+        repo,
+        task_store,
+        event_bus,
+        &config,
+        SyncTrigger::Manual,
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::process::Command as StdCommand;
+
+    fn git_in(dir: &Path, args: &[&str]) {
+        StdCommand::new("git")
+            .args(args)
+            .current_dir(dir)
+            .env_remove("GIT_DIR")
+            .env_remove("GIT_INDEX_FILE")
+            .env_remove("GIT_WORK_TREE")
+            .output()
+            .unwrap();
+    }
+
+    fn init_test_repo() -> tempfile::TempDir {
+        let dir = tempfile::tempdir().unwrap();
+        let p = dir.path();
+        git_in(p, &["init", "-b", "main"]);
+        git_in(p, &["config", "user.email", "test@test.com"]);
+        git_in(p, &["config", "user.name", "Test"]);
+        git_in(p, &["config", "commit.gpgsign", "false"]);
+        std::fs::write(p.join("initial.txt"), "hello").unwrap();
+        git_in(p, &["add", "."]);
+        git_in(p, &["commit", "-m", "initial"]);
+        dir
+    }
+
+    #[test]
+    fn sync_state_record_merge() {
+        let mut state = SyncState::new();
+        assert_eq!(state.pending_merges, 0);
+        state.record_merge();
+        assert_eq!(state.pending_merges, 1);
+        state.record_merge();
+        assert_eq!(state.pending_merges, 2);
+    }
+
+    #[test]
+    fn sync_state_should_sync_eager() {
+        let mut state = SyncState::new();
+        let config = SyncConfig {
+            enabled: true,
+            sync_strategy: SyncStrategy::Eager,
+            auto_rebase: true,
+            dispatch_rebase_agent: true,
+        };
+        assert!(!state.should_sync(&config));
+        state.record_merge();
+        assert!(state.should_sync(&config));
+    }
+
+    #[test]
+    fn sync_state_should_sync_manual() {
+        let mut state = SyncState::new();
+        let config = SyncConfig {
+            enabled: true,
+            sync_strategy: SyncStrategy::Manual,
+            auto_rebase: true,
+            dispatch_rebase_agent: true,
+        };
+        state.record_merge();
+        assert!(!state.should_sync(&config));
+    }
+
+    #[test]
+    fn sync_state_should_sync_batched() {
+        let mut state = SyncState::new();
+        let config = SyncConfig {
+            enabled: true,
+            sync_strategy: SyncStrategy::Batched {
+                batch_count: 3,
+                interval_secs: 300,
+            },
+            auto_rebase: true,
+            dispatch_rebase_agent: true,
+        };
+
+        state.record_merge();
+        assert!(!state.should_sync(&config)); // 1 < 3
+
+        state.record_merge();
+        assert!(!state.should_sync(&config)); // 2 < 3
+
+        state.record_merge();
+        assert!(state.should_sync(&config)); // 3 >= 3
+    }
+
+    #[test]
+    fn sync_state_clear_pending() {
+        let mut state = SyncState::new();
+        state.record_merge();
+        state.record_merge();
+        assert_eq!(state.pending_merges, 2);
+        state.clear_pending();
+        assert_eq!(state.pending_merges, 0);
+        assert!(state.last_sync.is_some());
+    }
+
+    #[test]
+    fn sync_state_disabled_never_triggers() {
+        let mut state = SyncState::new();
+        let config = SyncConfig {
+            enabled: false,
+            sync_strategy: SyncStrategy::Eager,
+            auto_rebase: true,
+            dispatch_rebase_agent: true,
+        };
+        state.record_merge();
+        assert!(!state.should_sync(&config));
+    }
+
+    #[test]
+    fn detect_default_branch_finds_main() {
+        let dir = init_test_repo();
+        let branch = detect_default_branch(dir.path()).unwrap();
+        assert_eq!(branch, "main");
+    }
+
+    #[test]
+    fn local_main_sha_returns_sha() {
+        let dir = init_test_repo();
+        let sha = local_main_sha(dir.path()).unwrap();
+        assert!(!sha.is_empty());
+        assert!(sha.len() >= 7);
+    }
+
+    #[test]
+    fn fast_forward_main_noop_when_same_sha() {
+        let dir = init_test_repo();
+        let sha = local_main_sha(dir.path()).unwrap();
+        let changed = fast_forward_main(dir.path(), &sha).unwrap();
+        assert!(!changed);
+    }
+
+    #[test]
+    fn rebase_branch_nonexistent_branch() {
+        let dir = init_test_repo();
+        let result = rebase_branch(dir.path(), "nonexistent-branch", None);
+        assert!(!result.success);
+        assert!(result.error.is_some());
+    }
+
+    #[test]
+    fn rebase_branch_no_changes_needed() {
+        let dir = init_test_repo();
+        let p = dir.path();
+
+        // Create a branch at the same point as main
+        git_in(p, &["branch", "feature-a"]);
+
+        let result = rebase_branch(p, "feature-a", Some(TaskId(1)));
+        assert!(result.success);
+        assert!(!result.had_conflicts);
+    }
+
+    #[test]
+    fn rebase_branch_with_diverged_commits() {
+        let dir = init_test_repo();
+        let p = dir.path();
+
+        // Create a feature branch with a commit
+        git_in(p, &["checkout", "-b", "feature-b"]);
+        std::fs::write(p.join("feature.txt"), "feature work").unwrap();
+        git_in(p, &["add", "."]);
+        git_in(p, &["commit", "-m", "feature commit"]);
+
+        // Go back to main and add a commit
+        git_in(p, &["checkout", "main"]);
+        std::fs::write(p.join("main-update.txt"), "main update").unwrap();
+        git_in(p, &["add", "."]);
+        git_in(p, &["commit", "-m", "main update"]);
+
+        // Rebase feature-b onto main
+        let result = rebase_branch(p, "feature-b", Some(TaskId(2)));
+        assert!(result.success);
+        assert!(!result.had_conflicts);
+        assert!(result.new_head_sha.is_some());
+    }
+
+    #[test]
+    fn rebase_branch_with_conflicts() {
+        let dir = init_test_repo();
+        let p = dir.path();
+
+        // Create a feature branch that modifies the same file
+        git_in(p, &["checkout", "-b", "feature-c"]);
+        std::fs::write(p.join("initial.txt"), "feature version").unwrap();
+        git_in(p, &["add", "."]);
+        git_in(p, &["commit", "-m", "feature change"]);
+
+        // Go back to main and modify the same file differently
+        git_in(p, &["checkout", "main"]);
+        std::fs::write(p.join("initial.txt"), "main version").unwrap();
+        git_in(p, &["add", "."]);
+        git_in(p, &["commit", "-m", "main change"]);
+
+        // This should conflict
+        let result = rebase_branch(p, "feature-c", Some(TaskId(3)));
+        assert!(!result.success);
+        assert!(result.had_conflicts);
+    }
+}
diff --git a/crates/thrum-runner/src/worktree.rs b/crates/thrum-runner/src/worktree.rs
index fda2192..5af3503 100644
--- a/crates/thrum-runner/src/worktree.rs
+++ b/crates/thrum-runner/src/worktree.rs
@@ -20,6 +20,8 @@ impl Worktree {
     /// Create a new worktree for the given branch.
     ///
     /// Runs `git worktree add <base_dir>/<branch_slug> <branch>`.
+    /// If a stale worktree already exists at the target path, it is
+    /// cleaned up automatically before re-creating.
     pub fn create(repo_path: &Path, branch: &str, base_dir: &Path) -> Result<Self> {
         let slug: String = branch
             .chars()
@@ -35,8 +37,55 @@ impl Worktree {
 
         std::fs::create_dir_all(base_dir).context("failed to create worktree base directory")?;
 
+        // If a stale worktree exists from a previous crash, clean it up first.
+        if worktree_path.exists() {
+            tracing::warn!(
+                worktree = %worktree_path.display(),
+                branch,
+                "stale worktree directory found — cleaning up before re-creating"
+            );
+            // Try git worktree remove first (handles git metadata cleanly).
+            let _ = Command::new("git")
+                .args([
+                    "worktree",
+                    "remove",
+                    "--force",
+                    worktree_path.to_str().unwrap(),
+                ])
+                .current_dir(repo_path)
+                .env_remove("GIT_DIR")
+                .env_remove("GIT_INDEX_FILE")
+                .env_remove("GIT_WORK_TREE")
+                .output();
+
+            // Prune any dangling worktree metadata.
+            let _ = Command::new("git")
+                .args(["worktree", "prune"])
+                .current_dir(repo_path)
+                .env_remove("GIT_DIR")
+                .env_remove("GIT_INDEX_FILE")
+                .env_remove("GIT_WORK_TREE")
+                .output();
+
+            // If the directory still exists (broken state), force-remove it.
+            if worktree_path.exists() {
+                std::fs::remove_dir_all(&worktree_path)
+                    .context("failed to remove stale worktree directory")?;
+                tracing::info!(
+                    worktree = %worktree_path.display(),
+                    "force-removed stale worktree directory"
+                );
+            }
+        }
+
         let output = Command::new("git")
-            .args(["worktree", "add", worktree_path.to_str().unwrap(), branch])
+            .args([
+                "worktree",
+                "add",
+                "--force",
+                worktree_path.to_str().unwrap(),
+                branch,
+            ])
             .current_dir(repo_path)
             .env_remove("GIT_DIR")
             .env_remove("GIT_INDEX_FILE")
@@ -55,6 +104,11 @@ impl Worktree {
             "created git worktree"
         );
 
+        // Install a pre-commit hook that runs cargo fmt + clippy.
+        // This catches formatting and lint errors at commit time instead of
+        // wasting a full gate cycle to discover them.
+        install_precommit_hook(&worktree_path);
+
         Ok(Self {
             path: worktree_path,
             repo_path: repo_path.to_path_buf(),
@@ -83,6 +137,73 @@ impl Worktree {
     }
 }
 
+/// Install a pre-commit hook in a worktree that runs cargo fmt --check and clippy.
+///
+/// In worktrees, hooks live in the worktree's gitdir (found via the `.git` file),
+/// NOT in `.git/hooks/`. This ensures agents get immediate feedback on fmt/clippy
+/// failures at commit time rather than discovering them after a full gate cycle.
+fn install_precommit_hook(worktree_path: &Path) {
+    let hook_script = r#"#!/bin/sh
+# Pre-commit hook installed by thrum — catches fmt/clippy before gate checks.
+# Runs cargo fmt --check and cargo clippy to fail fast on obvious issues.
+
+# cargo fmt --check
+if ! cargo fmt -- --check >/dev/null 2>&1; then
+    echo "pre-commit: cargo fmt --check failed. Run 'cargo fmt' to fix." >&2
+    exit 1
+fi
+
+# cargo clippy
+if ! cargo clippy --workspace --tests -- -D warnings 2>/dev/null; then
+    echo "pre-commit: cargo clippy failed. Fix warnings before committing." >&2
+    exit 1
+fi
+"#;
+
+    // In a worktree, `.git` is a file containing `gitdir: /path/to/.git/worktrees/<name>`.
+    // Hooks go in that gitdir's `hooks/` subdirectory.
+    let git_file = worktree_path.join(".git");
+    let hooks_dir = if git_file.is_file() {
+        std::fs::read_to_string(&git_file).ok().and_then(|content| {
+            content
+                .strip_prefix("gitdir: ")
+                .map(|p| PathBuf::from(p.trim()).join("hooks"))
+        })
+    } else if git_file.is_dir() {
+        Some(git_file.join("hooks"))
+    } else {
+        None
+    };
+
+    if let Some(hooks_dir) = hooks_dir {
+        if let Err(e) = std::fs::create_dir_all(&hooks_dir) {
+            tracing::warn!(error = %e, "failed to create hooks dir for pre-commit hook");
+            return;
+        }
+        let hook_path = hooks_dir.join("pre-commit");
+        match std::fs::write(&hook_path, hook_script) {
+            Ok(()) => {
+                // Make executable
+                #[cfg(unix)]
+                {
+                    use std::os::unix::fs::PermissionsExt;
+                    let _ = std::fs::set_permissions(
+                        &hook_path,
+                        std::fs::Permissions::from_mode(0o755),
+                    );
+                }
+                tracing::info!(
+                    hook = %hook_path.display(),
+                    "installed pre-commit hook (fmt + clippy)"
+                );
+            }
+            Err(e) => {
+                tracing::warn!(error = %e, "failed to write pre-commit hook");
+            }
+        }
+    }
+}
+
 impl Drop for Worktree {
     fn drop(&mut self) {
         if self.path.exists()
@@ -178,4 +299,22 @@ mod tests {
             .collect();
         assert_eq!(slug, "auto_TASK-42_foo_bar");
     }
+
+    #[test]
+    fn create_recovers_from_stale_worktree() {
+        let repo_dir = init_test_repo();
+        let base = tempfile::tempdir().unwrap();
+
+        // Create a worktree then simulate a crash by leaking it (no cleanup).
+        let wt = Worktree::create(repo_dir.path(), "test-branch", base.path()).unwrap();
+        let path = wt.path.clone();
+        assert!(path.exists());
+        // Leak the worktree without cleanup — simulates engine crash.
+        std::mem::forget(wt);
+
+        // Creating the same worktree again should succeed (auto-cleans stale).
+        let wt2 = Worktree::create(repo_dir.path(), "test-branch", base.path()).unwrap();
+        assert!(wt2.path.exists());
+        assert_eq!(wt2.path, path);
+    }
 }
diff --git a/examples/minimal/pipeline.toml b/examples/minimal/pipeline.toml
index de3faad..f347a65 100644
--- a/examples/minimal/pipeline.toml
+++ b/examples/minimal/pipeline.toml
@@ -61,18 +61,21 @@ backend = "opus"
 prompt_template = "agents/implementer.md"
 budget_usd = 6.0
 timeout_secs = 600
+timeout_recovery = "retry"    # Resume from checkpoint on timeout
 
 [roles.reviewer]
 backend = "sonnet"
 prompt_template = "agents/reviewer.md"
 budget_usd = 1.0
 timeout_secs = 300
+timeout_recovery = "skip"     # Auto-approve with "review-skipped-timeout" note
 
 [roles.planner]
 backend = "opus"
 prompt_template = "agents/planner.md"
 budget_usd = 1.0
 timeout_secs = 300
+timeout_recovery = "fail"     # Planning timeout = real failure
 
 [sandbox]
 backend = "none"
diff --git a/examples/minimal/repos.toml b/examples/minimal/repos.toml
index f5b625d..155c618 100644
--- a/examples/minimal/repos.toml
+++ b/examples/minimal/repos.toml
@@ -11,3 +11,29 @@ test_cmd = "cargo test"
 lint_cmd = "cargo clippy -- -D warnings"
 fmt_cmd = "cargo fmt -- --check"
 # claude_md = "/path/to/my-project/CLAUDE.md"
+
+# Gate 1 checks (opt-in per check). Default: ["cargo_fmt", "cargo_clippy", "cargo_test"]
+# Available: cargo_fmt, cargo_clippy, cargo_test, cargo_audit, cargo_deny, cargo_mutants
+# checks = ["cargo_fmt", "cargo_clippy", "cargo_test", "cargo_audit", "cargo_deny"]
+
+# Mutation testing configuration (only used when "cargo_mutants" is in checks).
+# [repo.mutants]
+# changed_files_only = true    # Only test mutations in changed files
+# max_survival_rate = 20.0     # Warn if > 20% of mutations survive
+# timeout_secs = 60            # Timeout per mutant
+
+# CI integration (opt-in). Uncomment to enable push + PR creation after
+# local integration passes. When omitted, behavior is local merge only.
+# [repo.ci]
+# enabled = true
+# poll_interval_secs = 60
+# max_ci_retries = 3
+# auto_merge = true
+# merge_strategy = "squash"
+
+# Trust boundaries (opt-in). Classify files by risk level to control
+# auto-approval and trigger extra security checks.
+# [repo.trust]
+# high_risk = ["src/crypto/**", "src/auth/**", "Cargo.lock"]
+# security_sensitive = ["Cargo.toml", "build.rs", ".github/**"]
+# auto_ok = ["docs/**", "*.md", "tests/**"]
diff --git a/examples/pulseengine/pipeline.toml b/examples/pulseengine/pipeline.toml
index bb9f9a7..e191c26 100644
--- a/examples/pulseengine/pipeline.toml
+++ b/examples/pulseengine/pipeline.toml
@@ -82,18 +82,21 @@ backend = "opus"
 prompt_template = "agents/implementer.md"
 budget_usd = 6.0
 timeout_secs = 600
+timeout_recovery = "retry"    # Resume from checkpoint on timeout
 
 [roles.reviewer]
 backend = "sonnet"
 prompt_template = "agents/reviewer.md"
 budget_usd = 1.0
 timeout_secs = 300
+timeout_recovery = "skip"     # Auto-approve with "review-skipped-timeout" note
 
 [roles.planner]
 backend = "opus"
 prompt_template = "agents/planner.md"
 budget_usd = 1.0
 timeout_secs = 300
+timeout_recovery = "fail"     # Planning timeout = real failure
 
 [sandbox]
 backend = "none"
diff --git a/examples/pulseengine/repos.toml b/examples/pulseengine/repos.toml
index a700e3c..f4cb8e3 100644
--- a/examples/pulseengine/repos.toml
+++ b/examples/pulseengine/repos.toml
@@ -19,6 +19,32 @@ verify_cmd = "LIBRARY_PATH=/opt/homebrew/lib cargo test --release -- z3"
 proofs_cmd = "bazel build //proofs:all_proofs"
 claude_md = "/Users/r/git/loom/CLAUDE.md"
 safety_target = "AsilB"
+# Expanded gate checks: security audit + policy enforcement + mutation testing.
+checks = ["cargo_fmt", "cargo_clippy", "cargo_test", "cargo_audit", "cargo_deny", "cargo_mutants"]
+
+# Mutation testing configuration.
+[repo.mutants]
+changed_files_only = true
+max_survival_rate = 15.0
+timeout_secs = 120
+
+# CI integration: push branch + create PR after local Gate 3 passes.
+# Thrum will poll CI status and auto-merge on green, or dispatch a
+# ci_fixer agent on failure (up to max_ci_retries).
+[repo.ci]
+enabled = true
+poll_interval_secs = 60
+max_ci_retries = 3
+auto_merge = true
+merge_strategy = "squash"
+
+# Trust boundaries: classify files by risk to control approval flow.
+# high_risk files CANNOT be auto-approved; security_sensitive triggers
+# cargo-audit / cargo-deny; auto_ok files can be fast-path approved.
+[repo.trust]
+high_risk = ["cranelift/codegen/src/isle/**", "cranelift/codegen/src/isa/**"]
+security_sensitive = ["Cargo.toml", "Cargo.lock", "build.rs", ".github/**"]
+auto_ok = ["docs/**", "*.md", "tests/**"]
 
 [[repo]]
 name = "Meld"
@@ -30,6 +56,11 @@ fmt_cmd = "cargo fmt -- --check"
 proofs_cmd = "bazel build //proofs:all_proofs"
 claude_md = "/Users/r/git/unkown-project/CLAUDE.md"
 
+[repo.trust]
+high_risk = ["src/fuser/**", "src/linker/**"]
+security_sensitive = ["Cargo.toml", "Cargo.lock"]
+auto_ok = ["docs/**", "*.md"]
+
 [[repo]]
 name = "Synth"
 path = "/Users/r/git/Synth"
@@ -40,3 +71,8 @@ fmt_cmd = "cargo fmt -- --check"
 verify_cmd = "cargo test --release -- z3"
 claude_md = "/Users/r/git/Synth/CLAUDE.md"
 safety_target = "AsilD"
+
+[repo.trust]
+high_risk = ["src/codegen/**", "src/safety/**"]
+security_sensitive = ["Cargo.toml", "Cargo.lock", "build.rs"]
+auto_ok = ["docs/**", "*.md", "benches/**"]