replicate · markphelps · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
@@ -214,12 +214,15 @@ Each `SchemaType` produces its JSON Schema fragment via `JSONSchema()`:
 
 ## Cog-Specific Extensions
 
-| Extension             | Purpose                                           |
-| --------------------- | ------------------------------------------------- |
-| `x-order`             | Preserves parameter order from function signature |
-| `x-cog-array-type`    | Marks iterators vs regular arrays                 |
-| `x-cog-array-display` | Hints for how to display streaming output         |
-| `x-cog-secret`        | Marks sensitive inputs                            |
+| Extension             | Purpose                                             |
+| --------------------- | --------------------------------------------------- |
+| `x-order`             | Preserves parameter order from function signature   |
+| `x-cog-array-type`    | Marks iterators vs regular arrays                   |
+| `x-cog-array-display` | Hints for how to display streaming output           |
+| `x-cog-secret`        | Marks sensitive inputs                              |
+| `x-cog-streaming`     | Marks prediction operations that accept SSE clients |
+
+Iterator output types describe the shape of accumulated JSON output. SSE response support is a separate prediction operation capability and is only advertised when the prediction handler opts in with `@cog.streaming`.
 
 ## Where the Schema Lives
 

@@ -7,7 +7,7 @@
 //! 4. Run event loop routing responses to predictions
 //! 5. On worker crash: fail all predictions, shut down
 
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::process::Stdio;
 use std::sync::Arc;
 use std::sync::Mutex as StdMutex;
@@ -29,6 +29,8 @@ use crate::bridge::transport::create_transport;
 use crate::permit::{InactiveSlotIdleToken, PermitPool, SlotIdleToken};
 use crate::prediction::Prediction;
 
+const MAX_PENDING_CANCELLATIONS: usize = 1000;
+
 /// Upload a file to a signed endpoint, returning the final URL.
 ///
 /// Matches the behavior of Python cog's `put_file_to_signed_endpoint`:
@@ -353,15 +355,18 @@ pub struct OrchestratorReady {
     pub setup_logs: String,
 }
 
+struct RegisterPredictionMessage {
+    slot_id: SlotId,
+    prediction: Arc<StdMutex<Prediction>>,
+    idle_sender: tokio::sync::oneshot::Sender<SlotIdleToken>,
+    registered_ack: tokio::sync::oneshot::Sender<()>,
+}
+
 pub struct OrchestratorHandle {
     child: Child,
     ctrl_writer:
         Arc<tokio::sync::Mutex<FramedWrite<tokio::process::ChildStdin, JsonCodec<ControlRequest>>>>,
-    register_tx: mpsc::Sender<(
-        SlotId,
-        Arc<StdMutex<Prediction>>,
-        tokio::sync::oneshot::Sender<SlotIdleToken>,
-    )>,
+    register_tx: mpsc::Sender<RegisterPredictionMessage>,
     healthcheck_tx: mpsc::Sender<tokio::sync::oneshot::Sender<HealthcheckResult>>,
     cancel_tx: mpsc::Sender<String>,
     slot_ids: Vec<SlotId>,
@@ -375,10 +380,17 @@ impl Orchestrator for OrchestratorHandle {
         prediction: Arc<StdMutex<Prediction>>,
         idle_sender: tokio::sync::oneshot::Sender<SlotIdleToken>,
     ) {
+        let (ack_tx, ack_rx) = tokio::sync::oneshot::channel();
         let _ = self
             .register_tx
-            .send((slot_id, prediction, idle_sender))
+            .send(RegisterPredictionMessage {
+                slot_id,
+                prediction,
+                idle_sender,
+                registered_ack: ack_tx,
+            })
             .await;
+        let _ = ack_rx.await;
     }
 
     async fn cancel_by_prediction_id(&self, prediction_id: &str) -> Result<(), OrchestratorError> {
@@ -688,6 +700,18 @@ pub async fn spawn_worker(
     })
 }
 
+fn record_pending_cancellation(pending_cancellations: &mut HashSet<String>, prediction_id: String) {
+    if pending_cancellations.len() >= MAX_PENDING_CANCELLATIONS {
+        tracing::warn!(
+            prediction_id = %prediction_id,
+            cap = MAX_PENDING_CANCELLATIONS,
+            "Dropping pending cancellation because the pending cancellation buffer is full"
+        );
+        return;
+    }
+    pending_cancellations.insert(prediction_id);
+}
+
 #[allow(clippy::too_many_arguments)]
 async fn run_event_loop(
     mut ctrl_reader: FramedRead<tokio::process::ChildStdout, JsonCodec<ControlResponse>>,
@@ -698,11 +722,7 @@ async fn run_event_loop(
         SlotId,
         FramedRead<tokio::net::unix::OwnedReadHalf, JsonCodec<SlotResponse>>,
     )>,
-    mut register_rx: mpsc::Receiver<(
-        SlotId,
-        Arc<StdMutex<Prediction>>,
-        tokio::sync::oneshot::Sender<SlotIdleToken>,
-    )>,
+    mut register_rx: mpsc::Receiver<RegisterPredictionMessage>,
     mut healthcheck_rx: mpsc::Receiver<tokio::sync::oneshot::Sender<HealthcheckResult>>,
     mut cancel_rx: mpsc::Receiver<String>,
     pool: Arc<PermitPool>,
@@ -718,6 +738,7 @@ async fn run_event_loop(
     let mut pending_healthchecks: Vec<tokio::sync::oneshot::Sender<HealthcheckResult>> = Vec::new();
     let mut healthcheck_counter: u64 = 0;
     let mut pending_uploads: HashMap<SlotId, Vec<tokio::task::JoinHandle<()>>> = HashMap::new();
+    let mut pending_cancellations: HashSet<String> = HashSet::new();
 
     let (slot_msg_tx, mut slot_msg_rx) =
         mpsc::channel::<(SlotId, Result<SlotResponse, std::io::Error>)>(100);
@@ -923,17 +944,19 @@ async fn run_event_loop(
                         }
                     }
                     None => {
-                        tracing::debug!(%prediction_id, "Cancel requested for unknown prediction (may have already completed)");
+                        tracing::debug!(%prediction_id, "Cancel requested for unknown prediction; storing pending cancellation");
+                        record_pending_cancellation(&mut pending_cancellations, prediction_id);
                     }
                 }
             }
 
-            Some((slot_id, prediction, idle_sender)) = register_rx.recv() => {
+            Some(RegisterPredictionMessage { slot_id, prediction, idle_sender, registered_ack }) = register_rx.recv() => {
                 let prediction_id = match try_lock_prediction(&prediction) {
                     Some(p) => p.id().to_string(),
                     None => {
                         // Mutex poisoned during registration - prediction already failed
                         tracing::error!(%slot_id, "Prediction mutex poisoned during registration");
+                        let _ = registered_ack.send(());
                         continue;
                     }
                 };
@@ -949,6 +972,24 @@ async fn run_event_loop(
                 );
                 tracing::debug!(%slot_id, %prediction_id, "Registered prediction");
                 predictions.insert(slot_id, prediction);
+                let pending_cancel = pending_cancellations.remove(&prediction_id);
+                let _ = registered_ack.send(());
+                if pending_cancel {
+                    tracing::info!(
+                        target: "coglet::prediction",
+                        %prediction_id,
+                        %slot_id,
+                        "Applying pending cancellation"
+                    );
+                    let mut writer = ctrl_writer.lock().await;
+                    if let Err(e) = writer.send(ControlRequest::Cancel { slot: slot_id }).await {
+                        tracing::error!(
+                            %slot_id,
+                            error = %e,
+                            "Failed to send pending cancel request to worker"
+                        );
+                    }
+                }
             }
 
             Some((slot_id, result)) = slot_msg_rx.recv() => {
@@ -966,7 +1007,7 @@ async fn run_event_loop(
                     Ok(SlotResponse::LogLine { source, data }) => {
                         let (prediction_id, poisoned) = if let Some(pred) = predictions.get(&slot_id) {
                             if let Some(mut p) = try_lock_prediction(pred) {
-                                p.append_log(&data);
+                                p.append_log_source(source, &data);
                                 (Some(p.id().to_string()), false)
                             } else {
                                 (None, true)
@@ -1015,10 +1056,10 @@ async fn run_event_loop(
                             predictions.remove(&slot_id);
                         }
                     }
-                    Ok(SlotResponse::OutputChunk { output, index: _ }) => {
+                    Ok(SlotResponse::OutputChunk { output, index }) => {
                         let poisoned = if let Some(pred) = predictions.get(&slot_id) {
                             if let Some(mut p) = try_lock_prediction(pred) {
-                                p.append_output(output);
+                                p.append_output_chunk(output, index);
                                 false
                             } else {
                                 true
@@ -1255,6 +1296,19 @@ mod tests {
         assert_eq!(result.into_values(), Vec::<serde_json::Value>::new());
     }
 
+    #[test]
+    fn record_pending_cancellation_caps_stored_ids() {
+        let mut pending = HashSet::new();
+        for index in 0..MAX_PENDING_CANCELLATIONS {
+            record_pending_cancellation(&mut pending, format!("pred-{index}"));
+        }
+
+        record_pending_cancellation(&mut pending, "overflow".to_string());
+
+        assert_eq!(pending.len(), MAX_PENDING_CANCELLATIONS);
+        assert!(!pending.contains("overflow"));
+    }
+
     #[test]
     fn wrap_outputs_schema_array_single_item() {
         // List[Path] with num_outputs=1 → ["url"] not "url"