diff --git a/.gitignore b/.gitignore index 8e2474c..34d6b03 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.redb .claude/ traces/ +worktrees/ diff --git a/Cargo.lock b/Cargo.lock index f76186f..e3795da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -88,31 +88,6 @@ version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" -[[package]] -name = "async-openai" -version = "0.27.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d126927c78e1562d7e8473008ac8b082318c04d69e3a83e3495a563f8b84a66" -dependencies = [ - "backoff", - "base64", - "bytes", - "derive_builder", - "eventsource-stream", - "futures", - "rand 0.8.5", - "reqwest", - "reqwest-eventsource", - "secrecy", - "serde", - "serde_json", - "thiserror 2.0.18", - "tokio", - "tokio-stream", - "tokio-util", - "tracing", -] - [[package]] name = "async-trait" version = "0.1.89" @@ -143,6 +118,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core", + "base64", "bytes", "form_urlencoded", "futures-util", @@ -161,8 +137,10 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", + "sha1", "sync_wrapper", "tokio", + "tokio-tungstenite 0.28.0", "tower 0.5.3", "tower-layer", "tower-service", @@ -188,20 +166,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "backoff" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" -dependencies = [ - "futures-core", - "getrandom 0.2.17", - "instant", - "pin-project-lite", - "rand 0.8.5", - "tokio", -] - [[package]] name = "base64" version = "0.22.1" @@ -235,6 +199,15 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bollard" version = "0.18.1" @@ -260,7 +233,7 @@ dependencies = [ "serde_json", "serde_repr", "serde_urlencoded", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-util", "tower-service", @@ -340,12 +313,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" -[[package]] -name = "cfg_aliases" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" - [[package]] name = "chrono" version = "0.4.43" @@ -457,22 +424,21 @@ dependencies = [ "libc", ] -[[package]] -name = "core-foundation" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "criterion" version = "0.5.1" @@ -566,13 +532,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] -name = "darling" -version = "0.20.11" +name = "crypto-common" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ - "darling_core 0.20.11", - "darling_macro 0.20.11", + "generic-array", + "typenum", ] [[package]] @@ -581,22 +547,8 @@ version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" dependencies = [ - "darling_core 0.23.0", - "darling_macro 0.23.0", -] - -[[package]] -name = "darling_core" -version = "0.20.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn", + "darling_core", + "darling_macro", ] [[package]] @@ -614,25 +566,20 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.20.11" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ - "darling_core 0.20.11", + "darling_core", "quote", "syn", ] [[package]] -name = "darling_macro" -version = "0.23.0" +name = "data-encoding" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" -dependencies = [ - "darling_core 0.23.0", - "quote", - "syn", -] +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" [[package]] name = "deranged" @@ -645,34 +592,13 @@ dependencies = [ ] [[package]] -name = "derive_builder" -version = "0.20.2" +name = "digest" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "derive_builder_macro", -] - -[[package]] -name = "derive_builder_core" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" -dependencies = [ - "darling 0.20.11", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "derive_builder_macro" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" -dependencies = [ - "derive_builder_core", - "syn", + "block-buffer", + "crypto-common", ] [[package]] @@ -723,17 +649,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "eventsource-stream" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab" -dependencies = [ - "futures-core", - "nom", - "pin-project-lite", -] - [[package]] name = "fastrand" version = "2.3.0" @@ -791,21 +706,6 @@ dependencies = [ "libc", ] -[[package]] -name = "futures" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - [[package]] name = "futures-channel" version = "0.3.31" @@ -862,19 +762,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" -[[package]] -name = "futures-timer" -version = "3.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" - [[package]] name = "futures-util" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ - "futures-channel", "futures-core", "futures-io", "futures-macro", @@ -901,6 +794,16 @@ dependencies = [ "windows-result", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -908,10 +811,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", - "js-sys", "libc", "wasi", - "wasm-bindgen", ] [[package]] @@ -921,11 +822,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", - "js-sys", "libc", "r-efi", "wasip2", - "wasm-bindgen", ] [[package]] @@ -938,7 +837,7 @@ dependencies = [ "libc", "libgit2-sys", "log", - "openssl-probe 0.1.6", + "openssl-probe", "openssl-sys", "url", ] @@ -1113,7 +1012,6 @@ dependencies = [ "hyper", "hyper-util", "rustls", - "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls", @@ -1379,22 +1277,13 @@ version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357b7205c6cd18dd2c86ed312d1e70add149aea98e7ef72b9fdf0270e555c11d" dependencies = [ - "darling 0.23.0", + "darling", "indoc", "proc-macro2", "quote", "syn", ] -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", -] - [[package]] name = "ipnet" version = "2.11.0" @@ -1608,12 +1497,6 @@ dependencies = [ "hashbrown 0.15.5", ] -[[package]] -name = "lru-slab" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" - [[package]] name = "matchers" version = "0.2.0" @@ -1641,22 +1524,6 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "mime_guess" -version = "2.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" -dependencies = [ - "mime", - "unicase", -] - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "mio" version = "1.1.1" @@ -1678,24 +1545,14 @@ dependencies = [ "libc", "log", "openssl", - "openssl-probe 0.1.6", + "openssl-probe", "openssl-sys", "schannel", - "security-framework 2.11.1", + "security-framework", "security-framework-sys", "tempfile", ] -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "notify" version = "8.2.0" @@ -1809,12 +1666,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" -[[package]] -name = "openssl-probe" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" - [[package]] name = "openssl-sys" version = "0.9.111" @@ -1837,7 +1688,7 @@ dependencies = [ "futures-sink", "js-sys", "pin-project-lite", - "thiserror 2.0.18", + "thiserror", "tracing", ] @@ -1869,7 +1720,7 @@ dependencies = [ "opentelemetry_sdk", "prost", "reqwest", - "thiserror 2.0.18", + "thiserror", "tokio", "tonic", "tracing", @@ -1901,7 +1752,7 @@ dependencies = [ "percent-encoding", "rand 0.9.2", "serde_json", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-stream", "tracing", @@ -2089,61 +1940,6 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" -[[package]] -name = "quinn" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" -dependencies = [ - "bytes", - "cfg_aliases", - "pin-project-lite", - "quinn-proto", - "quinn-udp", - "rustc-hash", - "rustls", - "socket2", - "thiserror 2.0.18", - "tokio", - "tracing", - "web-time", -] - -[[package]] -name = "quinn-proto" -version = "0.11.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" -dependencies = [ - "bytes", - "getrandom 0.3.4", - "lru-slab", - "rand 0.9.2", - "ring", - "rustc-hash", - "rustls", - "rustls-pki-types", - "slab", - "thiserror 2.0.18", - "tinyvec", - "tracing", - "web-time", -] - -[[package]] -name = "quinn-udp" -version = "0.5.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" -dependencies = [ - "cfg_aliases", - "libc", - "once_cell", - "socket2", - "tracing", - "windows-sys 0.60.2", -] - [[package]] name = "quote" version = "1.0.44" @@ -2358,13 +2154,9 @@ dependencies = [ "js-sys", "log", "mime", - "mime_guess", "native-tls", "percent-encoding", "pin-project-lite", - "quinn", - "rustls", - "rustls-native-certs", "rustls-pki-types", "serde", "serde_json", @@ -2372,34 +2164,15 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-native-tls", - "tokio-rustls", - "tokio-util", "tower 0.5.3", "tower-http", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams", "web-sys", ] -[[package]] -name = "reqwest-eventsource" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde" -dependencies = [ - "eventsource-stream", - "futures-core", - "futures-timer", - "mime", - "nom", - "pin-project-lite", - "reqwest", - "thiserror 1.0.69", -] - [[package]] name = "ring" version = "0.17.14" @@ -2414,12 +2187,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - [[package]] name = "rustix" version = "0.38.44" @@ -2453,32 +2220,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "once_cell", - "ring", "rustls-pki-types", "rustls-webpki", "subtle", "zeroize", ] -[[package]] -name = "rustls-native-certs" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" -dependencies = [ - "openssl-probe 0.2.1", - "rustls-pki-types", - "schannel", - "security-framework 3.5.1", -] - [[package]] name = "rustls-pki-types" version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ - "web-time", "zeroize", ] @@ -2571,16 +2324,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "secrecy" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a" -dependencies = [ - "serde", - "zeroize", -] - [[package]] name = "security-framework" version = "2.11.1" @@ -2588,20 +2331,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ "bitflags 2.10.0", - "core-foundation 0.9.4", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework" -version = "3.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" -dependencies = [ - "bitflags 2.10.0", - "core-foundation 0.10.1", + "core-foundation", "core-foundation-sys", "libc", "security-framework-sys", @@ -2721,6 +2451,17 @@ dependencies = [ "time", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -2873,7 +2614,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ "bitflags 2.10.0", - "core-foundation 0.9.4", + "core-foundation", "system-configuration-sys", ] @@ -2900,33 +2641,13 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl 1.0.69", -] - [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.18", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "thiserror-impl", ] [[package]] @@ -2967,6 +2688,7 @@ dependencies = [ "thrum-runner", "tokio", "tokio-stream", + "tokio-tungstenite 0.26.2", "tokio-util", "toml", "tower 0.5.3", @@ -3011,7 +2733,7 @@ dependencies = [ "proptest", "serde", "serde_json", - "thiserror 2.0.18", + "thiserror", "toml", "tracing", "tracing-opentelemetry", @@ -3030,8 +2752,9 @@ dependencies = [ "serde", "serde_json", "tempfile", - "thiserror 2.0.18", + "thiserror", "thrum-core", + "toml", "tracing", ] @@ -3040,20 +2763,19 @@ name = "thrum-runner" version = "0.1.0" dependencies = [ "anyhow", - "async-openai", "async-trait", "bollard", "chrono", "futures-util", "git2", + "libc", "notify", "notify-debouncer-mini", "redb", - "reqwest", "serde", "serde_json", "tempfile", - "thiserror 2.0.18", + "thiserror", "thrum-core", "thrum-db", "tokio", @@ -3113,21 +2835,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "tinyvec" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "tokio" version = "1.49.0" @@ -3188,6 +2895,30 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "tokio-tungstenite" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a9daff607c6d2bf6c16fd681ccb7eecc83e4e2cdc1ca067ffaadfca5de7f084" +dependencies = [ + "futures-util", + "log", + "tokio", + "tungstenite 0.26.2", +] + +[[package]] +name = "tokio-tungstenite" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25a406cddcc431a75d3d9afc6a7c0f7428d4891dd973e4d54c56b46127bf857" +dependencies = [ + "futures-util", + "log", + "tokio", + "tungstenite 0.28.0", +] + [[package]] name = "tokio-util" version = "0.7.18" @@ -3436,16 +3167,50 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] -name = "unarray" -version = "0.1.4" +name = "tungstenite" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" +checksum = "4793cb5e56680ecbb1d843515b23b6de9a75eb04b66643e256a396d43be33c13" +dependencies = [ + "bytes", + "data-encoding", + "http", + "httparse", + "log", + "rand 0.9.2", + "sha1", + "thiserror", + "utf-8", +] + +[[package]] +name = "tungstenite" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442" +dependencies = [ + "bytes", + "data-encoding", + "http", + "httparse", + "log", + "rand 0.9.2", + "sha1", + "thiserror", + "utf-8", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] -name = "unicase" -version = "2.9.0" +name = "unarray" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicode-ident" @@ -3500,6 +3265,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -3524,6 +3295,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "wait-timeout" version = "0.2.1" @@ -3626,19 +3403,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "wasm-streams" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "web-sys" version = "0.3.85" diff --git a/Cargo.toml b/Cargo.toml index bb8b290..546b6eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,12 +30,8 @@ toml = "0.8" # CLI clap = { version = "4.5", features = ["derive"] } -# TUI -ratatui = "0.29" -crossterm = "0.28" - # HTTP server -axum = "0.8" +axum = { version = "0.8", features = ["ws"] } tower = "0.5" tower-http = { version = "0.6", features = ["cors", "trace"] } @@ -58,6 +54,9 @@ tracing-opentelemetry = "0.30" bollard = "0.18" futures-util = "0.3" +# System / OS +libc = "0.2" + # File watching notify = "8" notify-debouncer-mini = "0.7" @@ -65,9 +64,6 @@ notify-debouncer-mini = "0.7" # Git git2 = "0.19" -# TOML parsing for consistency checks -cargo_toml = "0.20" - # Testing proptest = "1" loom = "0.7" diff --git a/PLAN-THIN-THRUM.md b/PLAN-THIN-THRUM.md new file mode 100644 index 0000000..0bf1f1d --- /dev/null +++ b/PLAN-THIN-THRUM.md @@ -0,0 +1,451 @@ +# Plan: Thin Thrum — Pipeline Controller over Claude Code + +## Executive Summary + +Thrum is currently **40,340 lines of Rust** across 4 crates. Most of that code +reimplements things Claude Code 2.1.71 now does natively. The plan is to strip +Thrum down to a **thin pipeline controller** (~3-5K lines) that: + +1. Manages a **durable task queue** with gated state machine +2. Spawns Claude Code sessions with **full real-time visibility** +3. Runs **deterministic gate checks** (cargo test, clippy, Z3, cross-repo) +4. Provides a **dashboard** showing what every agent is doing right now +5. Handles **human approval** checkpoints + +Everything else — worktree management, sandbox, agent prompts, session +continuation, retry with memory, file operations — is Claude Code's job. + +--- + +## The Visibility Problem (Solved) + +**Current**: Thrum spawns `claude -p "prompt" --output-format json` and waits +for it to finish. You see nothing until the agent is done (or times out). + +**New**: Use `claude -p "prompt" --output-format stream-json --include-partial-messages` +which emits **real-time NDJSON** with every event: + +```jsonl +{"type":"system","subtype":"init","session_id":"...","tools":["Bash","Read","Edit",...]} +{"type":"stream_event",...} // partial token chunks +{"type":"assistant","message":{"content":[{"type":"tool_use","name":"Edit","input":{...}}]}} +{"type":"assistant","message":{"content":[{"type":"text","text":"I'll fix the bug in..."}]}} +{"type":"result","total_cost_usd":0.0653,"num_turns":3,"duration_ms":45000} +``` + +Each `tool_use` event shows the tool name and input in real time. Each `text` +event shows what the agent is thinking/saying. The dashboard can render this +as live tool-call cards — the exact feature TASK-0053 was trying to build +manually, but now it's free from Claude Code itself. + +**Alternative for full interactive visibility**: `claude remote-control --name "TASK-0051"` +starts a session visible at claude.ai/code. Thrum could open one per agent and +you'd watch them live in your browser. + +--- + +## Architecture: What Stays, What Goes + +### KEEP (Thrum's unique value) + +| Component | Lines | Why | +|-----------|-------|-----| +| **Task state machine** | ~300 | `Pending→Implementing→Gate1→Reviewing→Gate2→AwaitingApproval→Approved→Integrating→Merged` — durable, survives restarts | +| **Task queue + DB** | ~700 | redb persistence, claim/dispatch, retry count, dependency tracking | +| **Gate runner** | ~400 | Run `cargo test`, `cargo clippy`, `cargo fmt`, Z3/Rocq, mutants — deterministic checks, no AI needed | +| **Integration gate** | ~200 | Cross-repo pipeline (meld→loom→synth), merge-to-main | +| **Budget tracker** | ~150 | Track spend per task from Claude Code's `total_cost_usd` in result events | +| **Dashboard + API** | ~1500 | Task list, approval UI, live agent activity from stream events, SSE push | +| **Pipeline orchestrator** | ~800 | Dispatch loop, claim priority, semaphore, sequential merge queue | +| **Traceability** | ~200 | Link tasks→branches→commits→gate results for audit | +| **Total** | **~4,350** | | + +### REMOVE (Claude Code does it better) + +| Component | Current Lines | Replacement | +|-----------|--------------|-------------| +| `subprocess.rs` | 550 | `claude -p --output-format stream-json` — parse NDJSON | +| `claude.rs` | 200 | Direct `claude` invocation with `--worktree`, `--resume`, `--max-budget-usd` | +| `worktree.rs` | 200 | `claude --worktree` creates + cleans up worktrees automatically | +| `sandbox.rs` | 781 | `claude` has built-in seatbelt sandbox | +| `shutdown.rs` | 695 | PID tracking unnecessary — `claude` manages its own processes | +| `sync.rs` | 687 | `git fetch/pull` can be simple bash calls, not a module | +| `coordination_hub.rs` | ~300 | File-lock awareness → Claude Code's hooks system | +| `watcher.rs` | ~200 | File system watching → unnecessary | +| `anthropic.rs` | ~300 | Direct API backend → use Claude Code as the only backend | +| `openai_compat.rs` | ~200 | OpenAI compat backend → remove | +| `cli_agent.rs` | ~150 | Generic CLI agent → remove | +| `backend.rs` | ~200 | Backend trait abstraction → single concrete implementation | +| `session_export.rs` (both) | ~200 | Claude Code has `--resume` and session persistence | +| `ci.rs` (runner) | 1209 | CI integration → out of scope for thin version | +| `a2a.rs` (both) | ~1540 | Agent-to-Agent protocol → premature, remove | +| `consistency.rs` | ~400 | Cross-repo checks → simplify to gate | +| `convergence.rs` | ~300 | Failure pattern detection → Claude Code's memory handles this | +| `harness.rs` | ~1100 | Self-improving harness → future work, not core | +| `safety.rs` | ~300 | TCL/ASIL classification → documentation, not runtime | +| `sphinx_needs.rs` | ~200 | Requirements tracing → remove | +| `trust.rs` | ~300 | Trust boundaries → keep as config, remove runtime | +| `verification.rs` | ~989 | Tag-based verification → simplify | +| `watch.rs` (cli) | 1392 | TUI dashboard → web dashboard is better | +| **Total removed** | **~11,000+** | | + +### SIMPLIFY + +| Component | From | To | +|-----------|------|-----| +| `event.rs` | 1251 lines, 30+ event kinds | ~100 lines, 5 events: TaskClaimed, GatePassed, GateFailed, AgentStream, TaskMerged | +| `gate.rs` | 1415 lines | ~400 lines — just run commands, collect pass/fail | +| `parallel.rs` | 3935 lines (!) | ~800 lines — dispatch loop, invoke claude, parse stream, run gates | +| `dashboard.rs` | 2620 lines | ~800 lines — task list, approval buttons, live stream viewer | +| `lib.rs` (api) | 2961 lines | ~500 lines — REST endpoints + SSE | +| `main.rs` (cli) | 2533 lines | ~400 lines — run, task add/list/approve/reject, status | +| `role.rs` | ~200 lines | Remove — Claude Code's `--agent` flag replaces role system | +| `agent.rs` | ~200 lines | Remove — agent prompts go in `.claude/agents/*.md` | +| `checkpoint.rs` | ~300 lines | Remove — Claude Code's session persistence replaces | + +--- + +## New Claude Code Integration Layer + +Replace the entire `backend` trait + `subprocess` + `claude` + `worktree` + +`sandbox` stack with a single module: + +```rust +/// Spawn a Claude Code session and stream its output. +/// +/// Returns a stream of AgentEvents parsed from NDJSON. +pub async fn spawn_agent( + task: &Task, + repo: &RepoConfig, + prompt: &str, + budget_usd: f64, +) -> Result<(JoinHandle, mpsc::Receiver)> { + let mut cmd = Command::new("claude"); + cmd.arg("-p").arg(prompt) + .arg("--output-format").arg("stream-json") + .arg("--include-partial-messages") + .arg("--worktree") // Claude creates + cleans worktree + .arg("--permission-mode").arg("auto") // or bypassPermissions in sandbox + .arg("--max-budget-usd").arg(budget_usd.to_string()) + .arg("--model").arg("claude-opus-4-6") + .current_dir(&repo.path) + .env_remove("CLAUDECODE") + .env_remove("CLAUDE_CODE_ENTRYPOINT") + .stdout(Stdio::piped()); + + // Optional: resume previous session on retry + if let Some(session_id) = &task.session_id { + cmd.arg("--resume").arg(session_id); + } + + // Optional: use custom agent definition + if let Some(agent) = &task.agent { + cmd.arg("--agent").arg(agent); + } + + let child = cmd.spawn()?; + let stdout = BufReader::new(child.stdout.take().unwrap()); + + // Parse NDJSON stream into typed events + let (tx, rx) = mpsc::channel(256); + let handle = tokio::spawn(async move { + let mut lines = stdout.lines(); + while let Some(line) = lines.next_line().await? { + if let Ok(event) = serde_json::from_str::(&line) { + match &event { + StreamEvent::Init { session_id, .. } => { + // Store session_id for resume on retry + tx.send(AgentEvent::SessionStarted(session_id)).await; + } + StreamEvent::Assistant { message } => { + for content in &message.content { + match content { + Content::ToolUse { name, input } => { + tx.send(AgentEvent::ToolCall { + tool: name.clone(), + input: input.clone(), + }).await; + } + Content::Text { text } => { + tx.send(AgentEvent::Text(text.clone())).await; + } + } + } + } + StreamEvent::Result { total_cost_usd, result, .. } => { + return Ok(AgentResult { + output: result.clone(), + cost_usd: *total_cost_usd, + session_id: event.session_id().cloned(), + }); + } + } + } + } + }); + + Ok((handle, rx)) +} +``` + +This single function replaces: +- `subprocess.rs` (550 lines) +- `claude.rs` (200 lines) +- `worktree.rs` (200 lines) +- `sandbox.rs` (781 lines) +- `shutdown.rs` (695 lines) +- `backend.rs` (200 lines) +- Half of `parallel.rs` (~2000 lines) + +**~4,600 lines → ~150 lines.** + +--- + +## New Pipeline (simplified parallel.rs) + +``` +loop { + // 1. Claim next task (priority: RetryableFailed > Approved > Pending) + let task = claim_next(&db)?; + + match task.status { + Pending | RetryableFailed => { + // IMPLEMENT: spawn Claude Code agent + let (handle, events) = spawn_agent(&task, &repo, &prompt, budget).await?; + + // Forward events to dashboard via SSE + while let Some(event) = events.recv().await { + event_bus.emit(AgentEvent(task.id, event)); + } + + let result = handle.await?; + if !result.has_changes() { + task.status = Gate1Failed("no changes"); + continue; + } + + // GATE 1: cargo test + clippy + fmt (deterministic, no AI) + let gate1 = run_gate_checks(&repo)?; + if !gate1.passed { + task.status = Gate1Failed(gate1); + continue; + } + + // REVIEW: spawn Claude Code with reviewer agent + let review = spawn_agent(&task, &repo, &review_prompt, 1.0).await?; + + // GATE 2: proof checks if configured + let gate2 = run_proof_checks(&repo)?; + + // → AwaitingApproval (human reviews in dashboard) + task.status = AwaitingApproval { gate1, review, gate2 }; + } + + Approved => { + // INTEGRATE: merge to main (no AI needed, pure git) + merge_to_main(&repo, &task.branch())?; + run_integration_gate(&repos)?; // cross-repo if configured + task.status = Merged; + } + } +} +``` + +--- + +## Dashboard: Real-Time Agent Visibility + +The new dashboard gets **live tool-call streams** for free: + +``` +┌─────────────────────────────────────────────────────┐ +│ TASK-0051: Add chat injection │ +│ Status: Implementing (3m 22s) Cost: $0.42 │ +│ │ +│ ┌─ Agent Activity ────────────────────────────────┐ │ +│ │ 🔧 Read crates/thrum-api/src/dashboard.rs │ │ +│ │ 🔧 Grep "SSE" --type rust │ │ +│ │ 💬 "I'll add a POST endpoint that sends..." │ │ +│ │ 🔧 Edit crates/thrum-api/src/lib.rs [lines 45] │ │ +│ │ 🔧 Bash cargo test --package thrum-api │ │ +│ │ ✅ Test passed │ │ +│ │ 🔧 Edit crates/thrum-api/src/dashboard.rs │ │ +│ │ ... ▼ live │ │ +│ └─────────────────────────────────────────────────┘ │ +├─────────────────────────────────────────────────────┤ +│ TASK-0053: Streaming tool cards Pending │ +│ TASK-0054: Inline config editing Pending │ +└─────────────────────────────────────────────────────┘ +``` + +Each `AgentEvent::ToolCall` from the stream-json output renders as a card. +Each `AgentEvent::Text` renders as agent commentary. No custom parsing of +Claude's internal format needed — the stream-json protocol gives us structured +events. + +**Alternative**: For even richer visibility, use `claude remote-control` +per-agent and embed the claude.ai/code URLs in the dashboard as iframes +or links. You'd see the full Claude Code UI per agent. + +--- + +## Migration Path + +### Phase 1: Stream visibility (immediate value, ~2 days) + +1. Change `claude.rs` to use `--output-format stream-json --include-partial-messages` +2. Parse NDJSON stream, extract tool_use/text/result events +3. Forward to event bus → SSE → dashboard +4. Dashboard renders live tool-call cards per agent +5. **Result**: You can see what every agent is doing in real-time + +### Phase 2: Delegate worktree + sandbox (~1 day) + +1. Add `--worktree` flag to claude invocation +2. Remove `worktree.rs`, `sandbox.rs` +3. Remove seatbelt profile generation +4. Claude Code manages worktree lifecycle + +### Phase 3: Simplify pipeline (~3 days) + +1. Remove `backend.rs` trait, `anthropic.rs`, `openai_compat.rs`, `cli_agent.rs` +2. Inline claude invocation directly in pipeline +3. Remove `shutdown.rs` PID tracking (Claude manages its own) +4. Remove agent prompt loading (use `--agent` flag or `.claude/agents/*.md`) +5. Add `--max-budget-usd` per task instead of manual budget tracking +6. Use Claude Code's `session_id` from init event for `--resume` on retries + +### Phase 4: Cut dead weight (~2 days) + +1. Remove `a2a.rs` (both crates), `ci.rs`, `sphinx_needs.rs` +2. Remove `harness.rs`, `convergence.rs`, `safety.rs` +3. Remove `consistency.rs` (fold into gate if needed) +4. Remove `coordination_hub.rs`, `watcher.rs` +5. Simplify `event.rs` to 5 core events +6. Remove `watch.rs` TUI (web dashboard is primary) + +### Phase 5: Leverage Claude Code plugins (~1 day) + +1. Create a `.claude/agents/implementer.md` with Thrum's agent prompt +2. Create a `.claude/agents/reviewer.md` with Thrum's reviewer prompt +3. Use `--agent implementer` and `--agent reviewer` instead of + loading prompt files and `--system-prompt` +4. Consider creating a Thrum plugin for Claude Code's plugin system + +--- + +## Ultra-Thin Variant: Agent Teams + +Claude Code's agent teams (`CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1`) combine +tmux + worktree isolation to run multiple agents concurrently. Each teammate +gets its own tmux pane, its own git worktree, and coordination via hooks +(`TeammateIdle`, `TaskCompleted`). + +This means Thrum's parallel dispatch engine (`parallel.rs` at ~3,935 lines) +can be replaced by a **task feeder** that: +1. Pops tasks from the durable queue +2. Invokes `claude -p --output-format stream-json --worktree` per task +3. Parses the stream for visibility +4. Runs deterministic gates after agent completion +5. Feeds approval results back + +Both Thrum and agent teams are experimental — no reason to avoid the leaner +path. If agent teams stabilize (expected soon), Thrum can optionally delegate +concurrency entirely. Until then, Thrum manages its own tokio-spawned agents +using the same `claude -p` invocation. + +### Line Count Projection (Ultra-Thin) + +| Component | Original | Current | Thin | Ultra-Thin | +|-----------|----------|---------|------|------------| +| thrum-core | 13,977 | 13,859 | ~1,500 | ~1,200 | +| thrum-db | 4,130 | 2,647 | ~1,200 | ~800 | +| thrum-runner | 11,321 | 10,301 | ~1,200 | ~600 | +| thrum-api | 6,904 | 6,848 | ~1,800 | ~1,200 | +| thrum-cli | 4,008 | 3,879 | ~400 | ~300 | +| **Total** | **40,340** | **39,202** | **~6,100** | **~4,100** | + +--- + +## Execution Progress + +The migration happens by **rewriting the core**, then **deleting** what becomes +unreferenced. Not the reverse — removing modules first would break compilation. + +### Step 1: ✅ New `claude_code.rs` integration (DONE) + +Created `crates/thrum-runner/src/claude_code.rs` (~390 lines) with: +- `AgentConfig`, `AgentEvent`, `AgentResult`, `AgentHandle`, `AiResponse` types +- `spawn_agent()`: invokes `claude -p` with `--output-format stream-json`, + parses NDJSON, streams typed events via mpsc channel +- `invoke_streaming()`: bridge that returns `AiResponse` for pipeline compat +- `load_agent_prompt()`: reads agent .md files, replaces `{{CLAUDE_MD}}` +- `health_check()`: runs `claude --version` + +### Step 2: ✅ Delete old backend stack (DONE) + +Deleted 5 modules (1,578 lines): +- `backend.rs` (618 LOC) — AiBackend trait, BackendRegistry, AiRequest +- `claude.rs` (385 LOC) — Claude CLI wrapper via subprocess +- `anthropic.rs` (193 LOC) — Anthropic Messages API +- `openai_compat.rs` (191 LOC) — OpenAI/Mistral/custom providers +- `cli_agent.rs` (191 LOC) — generic CLI agent wrapper + +Also migrated `ci.rs::dispatch_ci_fixer()` and `main.rs::invoke_planner()` +to use `claude_code::invoke_streaming()`. Removed unused `async-openai` and +`reqwest` dependencies. + +**Current LOC**: 39,202 (down from 40,340 — net -1,138) + +### Step 3: Simplify deeply-integrated modules (IN PROGRESS) + +Analysis revealed most thrum-core modules are deeply woven into the pipeline: +- `spec.rs`, `verification.rs`, `convergence.rs`, `harness.rs` → used by + gate execution and task state +- `trust.rs`, `traceability.rs`, `memory.rs` → used by task, API, dashboard +- `telemetry.rs`, `a2a.rs` → used by CLI, API +- `subsample.rs` → used by gate.rs + +Only `consistency.rs` and `safety.rs` are truly isolated (CLI-only). Mass +deletion would require cascading changes across all crates. + +**Next actions**: +1. Simplify `parallel.rs` — remove sandbox profile creation, observer mode, + and watcher setup that are no longer needed in the thin architecture +2. Remove `consistency.rs` and `safety.rs` with their CLI commands +3. Simplify `shutdown.rs` — the old ProcessTracker was designed for managing + ClaudeCliBackend subprocesses; now it just needs startup recovery +4. Make `a2a.rs`, `sphinx_needs.rs`, `harness.rs` optional (feature-gated) + +### Step 4: Simplify what remains (TODO) + +- `event.rs`: 1,251 → ~100 lines (5 events) +- `gate.rs`: 1,415 → ~400 lines (just run commands) +- `dashboard.rs`: 2,620 → ~800 lines (task list, approval, live stream) +- `main.rs`: 2,533 → ~300 lines (run, task, status) + +--- + +## What You Gain + +1. **Visibility**: See every tool call, every file edit, every bash command + in real-time on the dashboard +2. **Reliability**: Claude Code's worktree/sandbox/session management is + battle-tested across millions of users. Our custom implementations had + bugs (empty-branch false positive, stuck integrating, merge conflicts) +3. **Free upgrades**: Every Claude Code release improves your agents — + memory, plugins, better prompting, performance +4. **Less maintenance**: 90% less code = 90% fewer bugs to fix +5. **Plugin ecosystem**: Use `/code-review`, `/feature-dev`, `security-guidance` + hooks — all free +6. **Budget accuracy**: `total_cost_usd` from stream-json is the real cost, + not our estimates + +## What You Lose + +1. **Backend flexibility**: No more swapping to OpenAI/Anthropic API directly. + Claude Code is the only backend. (Acceptable — it's better anyway) +2. **Fine-grained process control**: Claude Code manages its own processes. + Less control over timeout behavior. (Mitigated by `--max-budget-usd`) +3. **A2A protocol**: The agent-to-agent protocol gets removed. (Premature + anyway — revisit when Claude Code's agent teams mature) diff --git a/agents/ci_fixer.md b/agents/ci_fixer.md new file mode 100644 index 0000000..ec9526d --- /dev/null +++ b/agents/ci_fixer.md @@ -0,0 +1,40 @@ +# CI Fix Agent + +You are a CI Fix Agent for the Thrum autonomous development pipeline. +Your sole job is to fix CI failures on a pull request branch. + +## Context + +{{CLAUDE_MD}} + +## Process + +1. **Read the CI failure logs** provided in the prompt carefully +2. **Identify the root cause** — build error, test failure, lint issue, type error, etc. +3. **Make the minimum necessary fix** — only change what's needed to make CI pass +4. **Run relevant checks locally** to verify your fix before committing: + - `cargo fmt --check` for formatting issues + - `cargo clippy` for lint issues + - `cargo test` for test failures + - `cargo build` for build errors +5. **Commit the fix** with a clear message like `fix: resolve CI failure in ` + +## Rules + +- Make **MINIMAL** changes — only fix the CI failure +- Do **NOT** refactor, add features, or restructure code +- Do **NOT** modify CI configuration unless the config itself is the bug +- Do **NOT** change test expectations unless the test is genuinely wrong +- If the fix requires understanding broader context, read the relevant source files first +- Commit your fix before exiting — uncommitted changes will be lost + +## Common CI Failures + +- **cargo fmt**: Run `cargo fmt` to auto-fix formatting +- **cargo clippy**: Read the clippy suggestion and apply the recommended fix +- **cargo test**: Read the test failure, understand the assertion, fix the code or test +- **cargo build**: Read the compiler error, fix the type/lifetime/borrow issue + +## Output + +After fixing, briefly summarize what you changed and why. diff --git a/agents/implementer_thrum.md b/agents/implementer_thrum.md new file mode 100644 index 0000000..54c672f --- /dev/null +++ b/agents/implementer_thrum.md @@ -0,0 +1,41 @@ +# Thrum Implementer + +You are the Implementation Agent for the **thrum** orchestration engine. +You implement tasks by writing code and tests following thrum's conventions exactly. + +## Target Repo Conventions + +The following is the complete CLAUDE.md for the thrum repository. Follow +every instruction precisely. + +{{CLAUDE_MD}} + +## Implementation Workflow + +1. Read the task description and acceptance criteria carefully +2. Understand the existing crate structure before making changes: + - `thrum-core`: Domain types (Task, Gate, Repo, Budget) + - `thrum-db`: Persistence via redb + - `thrum-runner`: Subprocess management, parallel engine, sandbox + - `thrum-api`: HTTP API and web dashboard + - `thrum-cli`: CLI binary +3. Write the implementation in the appropriate crate +4. Write tests for new functionality +5. Run `cargo fmt` to format code +6. Run `cargo clippy --workspace --tests -- -D warnings` and fix warnings +7. Run `cargo test --workspace` to verify all tests pass +8. **Commit your work**: `git add -A && git commit -m "descriptive message"` + - You MUST commit before finishing. Uncommitted work is lost. + - A pre-commit hook will run cargo fmt and clippy. If it fails, fix the issues and try again. + - Do NOT use `--no-verify` — the hook exists to catch problems early. + +## Working Directory + +Your current working directory IS the repo root. All source files are here. +Do NOT navigate to any other directory or use absolute paths from CLAUDE.md +or config files. Stay in your current working directory for ALL operations. + +## Branch Convention + +You are working on a branch created by thrum. Make commits with +clear messages describing what changed and why. diff --git a/agents/planner.md b/agents/planner.md index bcc6de6..409c6b8 100644 --- a/agents/planner.md +++ b/agents/planner.md @@ -21,9 +21,36 @@ produce a prioritized queue of implementation tasks. - **Title**: Clear, imperative description - **Repo**: Which repo this targets - **Description**: What needs to change and why - - **Acceptance criteria**: Specific, testable conditions + - **Acceptance criteria**: Specific, testable conditions with verification tags - **Requirement ID**: If traceable to a formal requirement +## Verification-Tagged Acceptance Criteria + +Every acceptance criterion MUST have a verification tag specifying HOW it will be +verified. If it matters, there must be a concrete, automated verification mechanism. +"Hope someone reads the code" is not acceptable. + +Valid tags: +- **(TEST)** — Verified by automated tests (unit, integration, property-based) +- **(LINT)** — Verified by linting / static analysis (clippy, eslint, etc.) +- **(BENCH)** — Verified by benchmarks / performance tests +- **(MANUAL)** — Requires manual human verification +- **(BROWSER)** — Verified by browser / UI testing +- **(SECURITY)** — Verified by security audit / scanning + +Each criterion must be: +1. **Concrete** — not vague ("make it better" is rejected) +2. **Measurable** — clear pass/fail condition +3. **Tagged** — ends with a verification tag in parentheses + +Examples: +- "All unit tests pass including new coverage (TEST)" +- "No clippy warnings on the changed crate (LINT)" +- "P99 latency below 50ms on /api/tasks (BENCH)" +- "Dashboard shows per-criterion verification status (BROWSER)" +- "No known CVEs in dependency tree (SECURITY)" +- "Architecture documentation reviewed by maintainer (MANUAL)" + ## Priority Rules 1. P0: Cross-repo consistency (version drift, unpinned deps) 2. P0: Blocking integration (e.g., shared type definitions) @@ -32,19 +59,45 @@ produce a prioritized queue of implementation tasks. 5. P3: Quality improvements, documentation ## Output Format -Produce a JSON array of task objects: +Produce a JSON array of task objects. Every acceptance criterion must include +a verification tag: ```json [ { "repo": "loom", "title": "Add i32.popcnt to ISLE pipeline", "description": "...", - "acceptance_criteria": ["..."], + "acceptance_criteria": [ + "cargo test passes with new popcnt tests (TEST)", + "No clippy warnings (LINT)", + "Z3 translation validation proof added (TEST)" + ], "requirement_id": "REQ-LOOM-042" } ] ``` +## Trust Boundaries & Risk Assessment + +Repositories may have trust boundary configurations in `[repo.trust]` that classify +files by risk level. When planning tasks, consider the trust implications: + +- **high_risk** files (e.g. `src/crypto/**`, `Cargo.lock`): Changes CANNOT be + auto-approved and must go through manual human review. Plan extra time. +- **security_sensitive** files (e.g. `Cargo.toml`, `build.rs`, `.github/**`): + Changes trigger extra security checks (cargo-audit, cargo-deny). May add latency. +- **auto_ok** files (e.g. `docs/**`, `*.md`): Safe for fast-path approval. + +When producing tasks, include a `risk_assessment` field if trust boundaries apply: +```json +{ + "repo": "loom", + "title": "Update cryptographic key derivation", + "risk_assessment": "HIGH — touches src/crypto/** (trust:high_risk)", + "description": "..." +} +``` + ## Cross-Repo Awareness - Changes to shared types (Instruction/WasmOp enums) need coordinated tasks - wasmparser upgrades must be synced across all repos diff --git a/configs/pipeline.toml b/configs/pipeline.toml index 82b129f..b3b3292 100644 --- a/configs/pipeline.toml +++ b/configs/pipeline.toml @@ -9,7 +9,7 @@ # agents can work concurrently on the same repo without index conflicts. [engine] -per_repo_limit = 3 +per_repo_limit = 4 worktrees_dir = "worktrees" max_retries = 10 # Reset via dashboard retry button to give a task another round @@ -72,7 +72,7 @@ checksums = "sha256" # Overall spending ceiling and per-session timeout for AI agents. [budget] -ceiling_usd = 1000.0 +ceiling_usd = 3000.0 per_session_timeout_secs = 600 [budget.allocation] @@ -98,7 +98,7 @@ type = "agent" command = "claude" prompt_args = ["-p", "{prompt}", "--output-format", "json"] model = "claude-opus-4-6" -timeout_secs = 1200 +timeout_secs = 2400 enabled = true # Uncomment to add OpenCode as an alternative agent: @@ -149,34 +149,56 @@ enabled = true # Map pipeline stages to AI backends and prompt templates. # Backend values reference a registered backend by name or model substring. # e.g., "opus" resolves to any backend whose model contains "opus". +# +# timeout_recovery: what to do when an agent invocation times out: +# "retry" — Resume from checkpoint (session continuation). Best for implementers. +# "skip" — Skip the timed-out step. For reviewers: auto-approve with note. +# "extend" — Double the timeout and retry once. Falls back to fail. +# "fail" — Treat timeout as failure (default). [roles.implementer] backend = "opus" prompt_template = "agents/implementer.md" budget_usd = 6.0 -timeout_secs = 1200 +timeout_secs = 2400 +timeout_recovery = "retry" # Resume from checkpoint on timeout [roles.reviewer] backend = "sonnet" prompt_template = "agents/reviewer.md" budget_usd = 1.0 timeout_secs = 300 +timeout_recovery = "skip" # Auto-approve with "review-skipped-timeout" note [roles.planner] backend = "opus" prompt_template = "agents/planner.md" budget_usd = 1.0 timeout_secs = 300 +timeout_recovery = "fail" # Planning timeout = real failure + +[roles.ci_fixer] +backend = "opus" +prompt_template = "agents/ci_fixer.md" +budget_usd = 3.0 +timeout_secs = 600 +timeout_recovery = "retry" # Resume CI fix from checkpoint # ── Sandbox ─────────────────────────────────────────────────────────── # Resource limits for agent subprocess execution. -# backend: "none" (no isolation), "docker", "nsjail", etc. +# backend: +# "none" — no isolation (passthrough) +# "os-native" — enforce seatbelt (macOS) / bubblewrap (Linux) +# "observe" — run without enforcement, audit writes after execution +# and log which operations WOULD be denied. Useful for +# debugging sandbox profiles before enabling enforcement. +# "docker" — Docker container isolation [sandbox] -backend = "none" +backend = "os-native" memory_limit_mb = 4096 cpu_limit = 2.0 -network = false +network = true # ── Subsampling ─────────────────────────────────────────────────────── # Run a fraction of gate checks to speed up iteration. diff --git a/crates/thrum-api/Cargo.toml b/crates/thrum-api/Cargo.toml index af401fb..6ba6a76 100644 --- a/crates/thrum-api/Cargo.toml +++ b/crates/thrum-api/Cargo.toml @@ -27,3 +27,4 @@ futures-util = { workspace = true } tempfile = "3" reqwest = { workspace = true } tokio = { workspace = true } +tokio-tungstenite = "0.26" diff --git a/crates/thrum-api/assets/dashboard.html b/crates/thrum-api/assets/dashboard.html index 9c1ed60..bd52764 100644 --- a/crates/thrum-api/assets/dashboard.html +++ b/crates/thrum-api/assets/dashboard.html @@ -3,8 +3,10 @@ + Thrum Dashboard + @@ -18,33 +20,68 @@

thrum

dashboard + ? - +
- +
- + +
+ Pipeline Legend — hover timeline steps for details +
+
+ P + + I + + G1 + + R + + G2 + + A + + Int + + CI + + M +
+
+
P Not reached
+
P Completed
+
I Active
+
G1 Failed
+ Full pipeline docs → +
+
+
+ +
-

Task Queue

+

Task Queue

+

Tasks progressing through the pipeline — click a row for details

@@ -52,29 +89,54 @@

Task Queue

-

Agent Activity

+

Agent Activity

+

Live AI agent sessions — cards update in real-time as agents implement, review, and gate-check tasks

Waiting for agent events…
- + +
+

Remote Sync

+

Fetch upstream changes and rebase active task branches onto updated main

+
+ + +
+
+
+ +
-

Memory

+

Memory

+

Persistent context for agents — error patterns, decisions, and hints that carry across retries

- +
-

Activity Log

+

Traceability

+
+
+
+ + +
+

Pipeline Events

+

Gate results, state transitions, and errors — filtered to meaningful pipeline activity

@@ -98,38 +160,219 @@

Reject Task