diff --git a/docs/proposals/114-mcp-interface-feedback-stress-test.md b/docs/proposals/114-mcp-interface-feedback-stress-test.md index 9fc6d9a9..346b1ac3 100644 --- a/docs/proposals/114-mcp-interface-feedback-stress-test.md +++ b/docs/proposals/114-mcp-interface-feedback-stress-test.md @@ -86,15 +86,19 @@ During the stress test, Plan 1 (20f1cfac) stalled at 5.5% with zero diagnostic i ### I4 — No idempotency guard on `plan_create` +**Status:** Implemented (PR #242). + **Problem:** If the user double-clicks or the agent accidentally submits twice, two identical plans are created. Each `plan_create` call generates a new `plan_id` with no server-side deduplication. -**Proposed fix:** Add an optional `request_id` (client-generated UUID) parameter to `plan_create`. The server checks if a plan with the same `request_id` already exists for this user: -- If yes: return the existing `plan_id` (idempotent response) -- If no: create a new plan +**Original proposal:** Add an optional `request_id` (client-generated UUID) parameter to `plan_create`. The server checks if a plan with the same `request_id` already exists for this user. This is a standard idempotency pattern (Stripe, AWS, etc.). + +**Why `request_id` was not adopted:** The primary MCP consumers are LLM agents. LLMs cannot generate UUIDs natively — they would need an extra tool call just to produce the idempotency key, adding friction to the exact workflow it's meant to protect. A mechanism that requires client cooperation is a poor fit when the clients are language models. + +**Implemented approach:** Automatic server-side dedup. Before inserting a new plan, `_create_plan_sync` queries for an existing `pending`/`processing` plan matching `(user_id, prompt, model_profile)` created within a configurable time window (default 10 minutes, env `PLANEXE_DEDUP_WINDOW_SECONDS`). If found, the existing plan is returned with `deduplicated: true` instead of creating a new one. No schema migration needed — uses existing columns. Set `PLANEXE_DEDUP_WINDOW_SECONDS=0` to disable. -This is a standard idempotency pattern (Stripe, AWS, etc.) and does not change behavior for clients that omit `request_id`. +**Known limitation:** There is a TOCTOU race — if two identical requests arrive concurrently and both pass the dedup check before either commits, a duplicate plan is created. This is accepted; the cost is wasted tokens for one extra plan, which is not worth the complexity of a database-level lock or migration to fix. -**Affected files:** `mcp_cloud/tool_models.py` (new optional field), `mcp_cloud/db_queries.py` (lookup by request_id before insert), DB model (new indexed column). +**Affected files:** `mcp_cloud/db_queries.py` (`_find_recent_duplicate_plan`, `_create_plan_sync`), `mcp_cloud/tool_models.py` (`deduplicated` field on `PlanCreateOutput`), `mcp_cloud/schemas.py` (description and `idempotentHint`), `mcp_cloud/app.py` (re-export). --- @@ -185,7 +189,7 @@ This enables prompt iteration tracking without changing existing behavior for pl | I1 (stopped vs failed) | 87 §4 (deferred) | Needs its own decision — Option A vs B | | I2 (failure diagnostics) | 113 (logs only) | Not surfaced to MCP consumer | | I3 (plan_delete) | None | New | -| I4 (idempotency) | None | New | +| I4 (idempotency) | None | **Implemented** (PR #242) | | I5 (rich SSE events) | 70 §5.1 (basic SSE done) | Events lack structured data | | I6 (download TTL) | 70 §4 (tokens done) | TTL too short, not configurable | | I7 (stall detection) | 87 §8 (partial) | No explicit stall timestamps | @@ -218,6 +222,6 @@ If accepted, I1–I4 and I7–I9 should be added to Proposal 70's quick-win chec | P2 | I6 (download TTL) | Low effort, reduces friction. | | P2 | I5 (rich SSE events) | Eliminates polling for SSE-capable clients. | | P2 | I3 (plan_delete) | Hygiene for multi-plan sessions. | -| P3 | I4 (idempotency) | Defensive, important for production but not urgent. | +| ~~P3~~ | ~~I4 (idempotency)~~ | **Implemented** (PR #242). Server-side auto-dedup on `(user_id, prompt, model_profile)` within a time window. | | P3 | I8 (plan_wait) | Nice-to-have for shell-less agents. | | P3 | I9 (prompt iteration) | Nice-to-have for iteration workflows. | diff --git a/mcp_cloud/app.py b/mcp_cloud/app.py index 0283466a..870271f8 100644 --- a/mcp_cloud/app.py +++ b/mcp_cloud/app.py @@ -55,6 +55,7 @@ find_plan_by_id, get_plan_by_id, resolve_plan_by_id, + _find_recent_duplicate_plan, _create_plan_sync, _get_plan_status_snapshot_sync, _request_plan_stop_sync, diff --git a/mcp_cloud/db_queries.py b/mcp_cloud/db_queries.py index 6f03c3cb..da51aca5 100644 --- a/mcp_cloud/db_queries.py +++ b/mcp_cloud/db_queries.py @@ -1,7 +1,8 @@ """PlanExe MCP Cloud – database query helpers.""" import logging +import os import uuid -from datetime import UTC, datetime +from datetime import UTC, datetime, timedelta from typing import Any, Optional from flask import has_app_context @@ -17,6 +18,7 @@ logger = logging.getLogger(__name__) PROMPT_EXCERPT_MAX_LENGTH = 100 +DEDUP_WINDOW_SECONDS = int(os.environ.get("PLANEXE_DEDUP_WINDOW_SECONDS", "600")) # --------------------------------------------------------------------------- @@ -71,6 +73,55 @@ def resolve_plan_by_id(plan_id: str) -> Optional[PlanItem]: return find_plan_by_id(plan_id) +# --------------------------------------------------------------------------- +# Dedup helper +# --------------------------------------------------------------------------- + +def _find_recent_duplicate_plan( + user_id: str, + prompt: str, + model_profile: str, + window_seconds: int = DEDUP_WINDOW_SECONDS, +) -> Optional[dict[str, Any]]: + """Return an existing pending/processing plan with the same prompt if created recently. + + Only fetches ``id``, ``timestamp_created``, and ``parameters`` to avoid + loading heavy columns (generated_report_html, run_zip_snapshot, + run_track_activity_jsonl). + + Comparison of *model_profile* is done in Python to avoid JSON column + dialect differences between SQLite and PostgreSQL. + + Returns ``None`` when *window_seconds* <= 0 (opt-out). + """ + if window_seconds <= 0: + return None + + cutoff = datetime.now(UTC) - timedelta(seconds=window_seconds) + + rows = ( + db.session.query( + PlanItem.id, + PlanItem.timestamp_created, + PlanItem.parameters, + ) + .filter( + PlanItem.user_id == user_id, + PlanItem.prompt == prompt, + PlanItem.state.in_([PlanState.pending, PlanState.processing]), + PlanItem.timestamp_created >= cutoff, + ) + .order_by(PlanItem.timestamp_created.desc()) + .all() + ) + + for row in rows: + params = row.parameters if isinstance(row.parameters, dict) else {} + if params.get("model_profile") == model_profile: + return {"id": row.id, "timestamp_created": row.timestamp_created} + return None + + # --------------------------------------------------------------------------- # Sync operations called from handlers via asyncio.to_thread # --------------------------------------------------------------------------- @@ -86,10 +137,33 @@ def _create_plan_sync( parameters["trigger_source"] = "mcp plan_create" parameters["pipeline_version"] = PIPELINE_VERSION + user_id = metadata.get("user_id", "admin") if metadata else "admin" + + # Dedup: return existing plan if an identical request was made recently. + existing = _find_recent_duplicate_plan( + user_id=user_id, + prompt=prompt, + model_profile=parameters["model_profile"], + ) + if existing is not None: + logger.info( + "Deduplicated plan_create for user %s — returning existing plan %s", + user_id, + existing["id"], + ) + created_at = existing["timestamp_created"] + if created_at and created_at.tzinfo is None: + created_at = created_at.replace(tzinfo=UTC) + return { + "plan_id": str(existing["id"]), + "created_at": format_datetime_utc(created_at), + "deduplicated": True, + } + plan = PlanItem( prompt=prompt, state=PlanState.pending, - user_id=metadata.get("user_id", "admin") if metadata else "admin", + user_id=user_id, api_key_id=metadata.get("api_key_id") if metadata else None, parameters=parameters, ) diff --git a/mcp_cloud/schemas.py b/mcp_cloud/schemas.py index d786683a..91049f61 100644 --- a/mcp_cloud/schemas.py +++ b/mcp_cloud/schemas.py @@ -154,7 +154,8 @@ class ToolDefinition: "Optionally, run `curl -N ` in a background shell as a completion detector — " "the stream auto-closes on terminal state (completed/failed). " "If you lose a plan_id, call plan_list to recover it. " - "Each plan_create call creates a new plan_id (no server-side dedup). " + "If the same prompt + model_profile is submitted by the same user within a short window, " + "the existing plan is returned (with deduplicated=true) instead of creating a new one. " "If you are unsure which model_profile to choose, call model_profiles first. " "If your deployment uses credits, include user_api_key to charge the correct account. " "Common error codes: INVALID_USER_API_KEY, USER_API_KEY_REQUIRED, INSUFFICIENT_CREDITS." @@ -164,7 +165,7 @@ class ToolDefinition: annotations={ "readOnlyHint": False, "destructiveHint": False, - "idempotentHint": False, + "idempotentHint": True, "openWorldHint": True, }, ), diff --git a/mcp_cloud/tests/test_plan_create_tool.py b/mcp_cloud/tests/test_plan_create_tool.py index 8f429e4f..aa73bcd0 100644 --- a/mcp_cloud/tests/test_plan_create_tool.py +++ b/mcp_cloud/tests/test_plan_create_tool.py @@ -9,6 +9,17 @@ from mcp_cloud.app import handle_list_tools, handle_plan_create +class StubPlanItem: + def __init__(self, prompt: str, state, user_id: str, parameters, api_key_id=None): + self.id = uuid.uuid4() + self.prompt = prompt + self.state = state + self.user_id = user_id + self.parameters = parameters + self.api_key_id = api_key_id + self.timestamp_created = datetime.now(UTC) + + class TestPlanCreateTool(unittest.TestCase): def test_plan_create_visible_schema_exposes_prompt_and_model_profile(self): tools = asyncio.run(handle_list_tools()) @@ -20,19 +31,13 @@ def test_plan_create_visible_schema_exposes_prompt_and_model_profile(self): def test_plan_create_returns_structured_content(self): arguments = {"prompt": "xcv", "config": None, "metadata": None} fake_session = MagicMock() - class StubPlanItem: - def __init__(self, prompt: str, state, user_id: str, parameters): - self.id = uuid.uuid4() - self.prompt = prompt - self.state = state - self.user_id = user_id - self.parameters = parameters - self.timestamp_created = datetime.now(UTC) with patch("mcp_cloud.db_queries.app.app_context", return_value=nullcontext()), patch( "mcp_cloud.db_queries.db.session", fake_session ), patch( "mcp_cloud.db_queries.PlanItem", StubPlanItem + ), patch( + "mcp_cloud.db_queries._find_recent_duplicate_plan", return_value=None ): result = asyncio.run(handle_plan_create(arguments)) @@ -41,6 +46,45 @@ def __init__(self, prompt: str, state, user_id: str, parameters): self.assertIn("plan_id", result.structuredContent) self.assertIn("created_at", result.structuredContent) self.assertIsInstance(uuid.UUID(result.structuredContent["plan_id"]), uuid.UUID) + # New plan should not have deduplicated key + self.assertNotIn("deduplicated", result.structuredContent) + + def test_plan_create_dedup_returns_existing_plan(self): + """When _find_recent_duplicate_plan returns a plan, plan_create returns it with deduplicated=True.""" + arguments = {"prompt": "build a spaceship", "config": None, "metadata": None} + fake_session = MagicMock() + + existing_id = uuid.uuid4() + existing_dict = { + "id": existing_id, + "timestamp_created": datetime.now(UTC), + } + + with patch("mcp_cloud.db_queries.app.app_context", return_value=nullcontext()), patch( + "mcp_cloud.db_queries.db.session", fake_session + ), patch( + "mcp_cloud.db_queries.PlanItem", StubPlanItem + ), patch( + "mcp_cloud.db_queries._find_recent_duplicate_plan", return_value=existing_dict + ): + result = asyncio.run(handle_plan_create(arguments)) + + self.assertIsInstance(result, CallToolResult) + self.assertIsInstance(result.structuredContent, dict) + self.assertEqual(result.structuredContent["plan_id"], str(existing_id)) + self.assertTrue(result.structuredContent["deduplicated"]) + + def test_find_recent_duplicate_plan_returns_none_when_window_zero(self): + """Opt-out: window_seconds=0 always returns None.""" + from mcp_cloud.db_queries import _find_recent_duplicate_plan + + result = _find_recent_duplicate_plan( + user_id="u1", + prompt="anything", + model_profile="baseline", + window_seconds=0, + ) + self.assertIsNone(result) if __name__ == "__main__": diff --git a/mcp_cloud/tool_models.py b/mcp_cloud/tool_models.py index 8e10be96..7762c225 100644 --- a/mcp_cloud/tool_models.py +++ b/mcp_cloud/tool_models.py @@ -134,6 +134,14 @@ class PlanCreateOutput(BaseModel): description="Plan UUID returned by plan_create. Stable across plan_status/plan_stop/plan_file_info." ) created_at: str + deduplicated: bool | None = Field( + default=None, + description=( + "True when this response returns an existing plan instead of creating a new one " + "(duplicate prompt + model_profile by the same user within the dedup window). " + "Absent or None for newly created plans." + ), + ) sse_url: str | None = Field( default=None, description=(