PlanExeOrg · neoneye · Mar 9, 2026
diff --git a/worker_plan/worker_plan_api/filenames.py b/worker_plan/worker_plan_api/filenames.py
@@ -3,6 +3,8 @@
 class FilenameEnum(str, Enum):
     START_TIME = "001-1-start_time.json"
     INITIAL_PLAN = "001-2-plan.txt"
+    BOOST_INITIAL_PROMPT_RAW = "001-3-boost_initial_prompt_raw.json"
+    BOOST_INITIAL_PROMPT_MARKDOWN = "001-4-boost_initial_prompt.md"
     REDLINE_GATE_RAW = "002-1-redline_gate.json"
     REDLINE_GATE_MARKDOWN = "002-2-redline_gate.md"
     PREMISE_ATTACK_RAW = "002-3-premise_attack.json"

diff --git a/worker_plan/worker_plan_internal/plan/boost_initial_prompt.py b/worker_plan/worker_plan_internal/plan/boost_initial_prompt.py
@@ -0,0 +1,154 @@
+"""
+Pre-planning LLM stage that scores prompt quality and rewrites weak prompts.
+
+Runs before the main pipeline to ensure the initial prompt has enough detail
+for PlanExe to produce a high-quality plan.  If the prompt scores below a
+configurable threshold, the LLM rewrites it as flowing prose with the missing
+dimensions filled in from reasonable defaults.
+
+PROMPT> python -m worker_plan_internal.plan.boost_initial_prompt
+"""
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional, List, Literal
+
+from pydantic import BaseModel, Field
+from llama_index.core.llms import ChatMessage, MessageRole
+from llama_index.core.llms.llm import LLM
+from worker_plan_internal.llm_util.llm_executor import LLMExecutor
+
+logger = logging.getLogger(__name__)
+
+QUALITY_THRESHOLD = 6  # out of 10; prompts scoring below this get rewritten
+
+
+class PromptDimensionScore(BaseModel):
+    dimension: str = Field(description="Name of the dimension being scored.")
+    score: int = Field(description="Score from 1 (absent) to 10 (excellent).", ge=1, le=10)
+    note: str = Field(description="Brief note explaining the score.")
+
+
+class PromptQualityAssessment(BaseModel):
+    overall_score: int = Field(
+        description="Overall prompt quality score from 1 to 10.", ge=1, le=10
+    )
+    dimensions: List[PromptDimensionScore] = Field(
+        description="Scores for each quality dimension."
+    )
+    missing_elements: List[str] = Field(
+        description="List of elements that are absent or underspecified."
+    )
+    verdict: Literal["sufficient", "needs_boost"] = Field(
+        description="Whether the prompt is sufficient or needs boosting."
+    )
+    boosted_prompt: Optional[str] = Field(
+        None,
+        description=(
+            "Rewritten prompt as flowing prose (~300-800 words) with missing "
+            "dimensions filled in. Only present when verdict is needs_boost."
+        ),
+    )
+
+
+SYSTEM_PROMPT = """You are a prompt quality assessor for PlanExe, a strategic project-planning system.
+
+Your job is to evaluate the user's project prompt and decide if it has enough detail for a high-quality 20+ section strategic plan. Score the prompt on these dimensions (1-10 each):
+
+1. **Objective clarity** — Is the goal specific and unambiguous?
+2. **Scope definition** — Are boundaries clear (what's included/excluded)?
+3. **Constraints** — Are budget, timeline, regulatory, or technical constraints mentioned?
+4. **Stakeholders** — Are key stakeholders, beneficiaries, or team roles identified?
+5. **Success criteria** — Are measurable outcomes or KPIs defined?
+6. **Context/background** — Is enough context given to understand the domain?
+
+Rules:
+- Score each dimension 1-10. Compute an overall score as the average rounded to nearest integer.
+- If overall_score < 6, set verdict to "needs_boost" and provide a boosted_prompt.
+- If overall_score >= 6, set verdict to "sufficient" and set boosted_prompt to null.
+- The boosted_prompt must be flowing prose (not markdown with headers or bullets), ~300-800 words.
+- Preserve the original intent. Add reasonable defaults for missing dimensions.
+- Do NOT change the fundamental project goal; only enrich with specifics.
+- Do NOT fabricate domain-specific facts the user didn't mention.
+"""
+
+
+@dataclass
+class BoostInitialPrompt:
+    original_prompt: str
+    assessment: PromptQualityAssessment
+    metadata: dict
+
+    @classmethod
+    def execute(cls, llm_executor: LLMExecutor, prompt: str) -> 'BoostInitialPrompt':
+        chat_messages = [
+            ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PROMPT.strip()),
+            ChatMessage(role=MessageRole.USER, content=prompt),
+        ]
+
+        def execute_function(llm: LLM) -> dict:
+            sllm = llm.as_structured_llm(PromptQualityAssessment)
+            response = sllm.chat(chat_messages)
+            return {"response": response, "metadata": dict(llm.metadata)}
+
+        result = llm_executor.run(execute_function)
+        assessment: PromptQualityAssessment = result["response"].raw
+
+        return cls(
+            original_prompt=prompt,
+            assessment=assessment,
+            metadata=result.get("metadata", {}),
+        )
+
+    @property
+    def effective_prompt(self) -> str:
+        """Return the boosted prompt if available, otherwise the original."""
+        if self.assessment.verdict == "needs_boost" and self.assessment.boosted_prompt:
+            return self.assessment.boosted_prompt
+        return self.original_prompt
+
+    def save_raw(self, file_path: str) -> None:
+        data = {
+            "original_prompt": self.original_prompt,
+            "assessment": self.assessment.model_dump(),
+            "metadata": self.metadata,
+        }
+        Path(file_path).write_text(json.dumps(data, indent=2), encoding="utf-8")
+
+    def save_markdown(self, file_path: str) -> None:
+        lines = ["# Prompt Quality Assessment\n"]
+        lines.append(f"**Overall Score:** {self.assessment.overall_score}/10\n")
+        lines.append(f"**Verdict:** {self.assessment.verdict}\n")
+
+        lines.append("\n## Dimension Scores\n")
+        lines.append("| Dimension | Score | Note |")
+        lines.append("|-----------|-------|------|")
+        for dim in self.assessment.dimensions:
+            lines.append(f"| {dim.dimension} | {dim.score}/10 | {dim.note} |")
+
+        if self.assessment.missing_elements:
+            lines.append("\n## Missing Elements\n")
+            for elem in self.assessment.missing_elements:
+                lines.append(f"- {elem}")
+
+        if self.assessment.boosted_prompt:
+            lines.append("\n## Boosted Prompt\n")
+            lines.append(self.assessment.boosted_prompt)
+
+        Path(file_path).write_text("\n".join(lines), encoding="utf-8")
+
+
+if __name__ == "__main__":
+    from worker_plan_internal.llm_util.llm_executor import LLMModelFromName
+
+    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+
+    test_prompt = "I want to open a coffee shop."
+    llm_models = LLMModelFromName.from_names(["ollama-llama3.1"])
+    executor = LLMExecutor(llm_models=llm_models)
+
+    result = BoostInitialPrompt.execute(executor, test_prompt)
+    print(f"Score: {result.assessment.overall_score}/10")
+    print(f"Verdict: {result.assessment.verdict}")
+    print(f"Effective prompt: {result.effective_prompt[:200]}...")
diff --git a/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py b/worker_plan/worker_plan_internal/plan/run_plan_pipeline.py
@@ -78,6 +78,7 @@
 from worker_plan_internal.team.team_markdown_document import TeamMarkdownDocumentBuilder
 from worker_plan_internal.team.review_team import ReviewTeam
 from worker_plan_internal.self_audit.self_audit import SelfAudit
+from worker_plan_internal.plan.boost_initial_prompt import BoostInitialPrompt
 from worker_plan_internal.wbs.wbs_task import WBSTask, WBSProject
 from worker_plan_internal.wbs.wbs_populate import WBSPopulate
 from worker_plan_internal.wbs.wbs_task_tooltip import WBSTaskTooltip
@@ -209,6 +210,29 @@ def run(self):
         raise AssertionError(f"This code is not supposed to be run. Before starting the pipeline the '{FilenameEnum.INITIAL_PLAN.value}' file must be present in the `run_id_dir`: {self.run_id_dir!r}")
 
 
+class BoostInitialPromptTask(PlanTask):
+    """Score the initial prompt quality and rewrite weak prompts before the pipeline runs."""
+    def requires(self):
+        return self.clone(SetupTask)
+
+    def output(self):
+        return {
+            'raw': self.local_target(FilenameEnum.BOOST_INITIAL_PROMPT_RAW),
+            'markdown': self.local_target(FilenameEnum.BOOST_INITIAL_PROMPT_MARKDOWN),
+        }
+
+    def run_inner(self):
+        llm_executor: LLMExecutor = self.create_llm_executor()
+
+        with self.input().open("r") as f:
+            plan_prompt = f.read()
+
+        result = BoostInitialPrompt.execute(llm_executor, plan_prompt)
+
+        result.save_raw(self.output()['raw'].path)
+        result.save_markdown(self.output()['markdown'].path)
+
+
 class RedlineGateTask(PlanTask):
     def requires(self):
         return self.clone(SetupTask)
@@ -3739,6 +3763,7 @@ def requires(self):
         return {
             'start_time': self.clone(StartTimeTask),
             'setup': self.clone(SetupTask),
+            'boost_initial_prompt': self.clone(BoostInitialPromptTask),
             'redline_gate': self.clone(RedlineGateTask),
             'premise_attack': self.clone(PremiseAttackTask),
             'identify_purpose': self.clone(IdentifyPurposeTask),