Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions worker_plan/worker_plan_api/filenames.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
class FilenameEnum(str, Enum):
START_TIME = "001-1-start_time.json"
INITIAL_PLAN = "001-2-plan.txt"
BOOST_INITIAL_PROMPT_RAW = "001-3-boost_initial_prompt_raw.json"
BOOST_INITIAL_PROMPT_MARKDOWN = "001-4-boost_initial_prompt.md"
REDLINE_GATE_RAW = "002-1-redline_gate.json"
REDLINE_GATE_MARKDOWN = "002-2-redline_gate.md"
PREMISE_ATTACK_RAW = "002-3-premise_attack.json"
Expand Down
154 changes: 154 additions & 0 deletions worker_plan/worker_plan_internal/plan/boost_initial_prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""
Pre-planning LLM stage that scores prompt quality and rewrites weak prompts.

Runs before the main pipeline to ensure the initial prompt has enough detail
for PlanExe to produce a high-quality plan. If the prompt scores below a
configurable threshold, the LLM rewrites it as flowing prose with the missing
dimensions filled in from reasonable defaults.

PROMPT> python -m worker_plan_internal.plan.boost_initial_prompt
"""
import json
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, List, Literal

from pydantic import BaseModel, Field
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core.llms.llm import LLM
from worker_plan_internal.llm_util.llm_executor import LLMExecutor

logger = logging.getLogger(__name__)

QUALITY_THRESHOLD = 6 # out of 10; prompts scoring below this get rewritten


class PromptDimensionScore(BaseModel):
dimension: str = Field(description="Name of the dimension being scored.")
score: int = Field(description="Score from 1 (absent) to 10 (excellent).", ge=1, le=10)
note: str = Field(description="Brief note explaining the score.")


class PromptQualityAssessment(BaseModel):
overall_score: int = Field(
description="Overall prompt quality score from 1 to 10.", ge=1, le=10
)
dimensions: List[PromptDimensionScore] = Field(
description="Scores for each quality dimension."
)
missing_elements: List[str] = Field(
description="List of elements that are absent or underspecified."
)
verdict: Literal["sufficient", "needs_boost"] = Field(
description="Whether the prompt is sufficient or needs boosting."
)
boosted_prompt: Optional[str] = Field(
None,
description=(
"Rewritten prompt as flowing prose (~300-800 words) with missing "
"dimensions filled in. Only present when verdict is needs_boost."
),
)


SYSTEM_PROMPT = """You are a prompt quality assessor for PlanExe, a strategic project-planning system.

Your job is to evaluate the user's project prompt and decide if it has enough detail for a high-quality 20+ section strategic plan. Score the prompt on these dimensions (1-10 each):

1. **Objective clarity** — Is the goal specific and unambiguous?
2. **Scope definition** — Are boundaries clear (what's included/excluded)?
3. **Constraints** — Are budget, timeline, regulatory, or technical constraints mentioned?
4. **Stakeholders** — Are key stakeholders, beneficiaries, or team roles identified?
5. **Success criteria** — Are measurable outcomes or KPIs defined?
6. **Context/background** — Is enough context given to understand the domain?

Rules:
- Score each dimension 1-10. Compute an overall score as the average rounded to nearest integer.
- If overall_score < 6, set verdict to "needs_boost" and provide a boosted_prompt.
- If overall_score >= 6, set verdict to "sufficient" and set boosted_prompt to null.
- The boosted_prompt must be flowing prose (not markdown with headers or bullets), ~300-800 words.
- Preserve the original intent. Add reasonable defaults for missing dimensions.
- Do NOT change the fundamental project goal; only enrich with specifics.
- Do NOT fabricate domain-specific facts the user didn't mention.
"""


@dataclass
class BoostInitialPrompt:
original_prompt: str
assessment: PromptQualityAssessment
metadata: dict

@classmethod
def execute(cls, llm_executor: LLMExecutor, prompt: str) -> 'BoostInitialPrompt':
chat_messages = [
ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PROMPT.strip()),
ChatMessage(role=MessageRole.USER, content=prompt),
]

def execute_function(llm: LLM) -> dict:
sllm = llm.as_structured_llm(PromptQualityAssessment)
response = sllm.chat(chat_messages)
return {"response": response, "metadata": dict(llm.metadata)}

result = llm_executor.run(execute_function)
assessment: PromptQualityAssessment = result["response"].raw

return cls(
original_prompt=prompt,
assessment=assessment,
metadata=result.get("metadata", {}),
)

@property
def effective_prompt(self) -> str:
"""Return the boosted prompt if available, otherwise the original."""
if self.assessment.verdict == "needs_boost" and self.assessment.boosted_prompt:
return self.assessment.boosted_prompt
return self.original_prompt

def save_raw(self, file_path: str) -> None:
data = {
"original_prompt": self.original_prompt,
"assessment": self.assessment.model_dump(),
"metadata": self.metadata,
}
Path(file_path).write_text(json.dumps(data, indent=2), encoding="utf-8")

def save_markdown(self, file_path: str) -> None:
lines = ["# Prompt Quality Assessment\n"]
lines.append(f"**Overall Score:** {self.assessment.overall_score}/10\n")
lines.append(f"**Verdict:** {self.assessment.verdict}\n")

lines.append("\n## Dimension Scores\n")
lines.append("| Dimension | Score | Note |")
lines.append("|-----------|-------|------|")
for dim in self.assessment.dimensions:
lines.append(f"| {dim.dimension} | {dim.score}/10 | {dim.note} |")

if self.assessment.missing_elements:
lines.append("\n## Missing Elements\n")
for elem in self.assessment.missing_elements:
lines.append(f"- {elem}")

if self.assessment.boosted_prompt:
lines.append("\n## Boosted Prompt\n")
lines.append(self.assessment.boosted_prompt)

Path(file_path).write_text("\n".join(lines), encoding="utf-8")


if __name__ == "__main__":
from worker_plan_internal.llm_util.llm_executor import LLMModelFromName

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

test_prompt = "I want to open a coffee shop."
llm_models = LLMModelFromName.from_names(["ollama-llama3.1"])
executor = LLMExecutor(llm_models=llm_models)

result = BoostInitialPrompt.execute(executor, test_prompt)
print(f"Score: {result.assessment.overall_score}/10")
print(f"Verdict: {result.assessment.verdict}")
print(f"Effective prompt: {result.effective_prompt[:200]}...")
25 changes: 25 additions & 0 deletions worker_plan/worker_plan_internal/plan/run_plan_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from worker_plan_internal.team.team_markdown_document import TeamMarkdownDocumentBuilder
from worker_plan_internal.team.review_team import ReviewTeam
from worker_plan_internal.self_audit.self_audit import SelfAudit
from worker_plan_internal.plan.boost_initial_prompt import BoostInitialPrompt
from worker_plan_internal.wbs.wbs_task import WBSTask, WBSProject
from worker_plan_internal.wbs.wbs_populate import WBSPopulate
from worker_plan_internal.wbs.wbs_task_tooltip import WBSTaskTooltip
Expand Down Expand Up @@ -209,6 +210,29 @@ def run(self):
raise AssertionError(f"This code is not supposed to be run. Before starting the pipeline the '{FilenameEnum.INITIAL_PLAN.value}' file must be present in the `run_id_dir`: {self.run_id_dir!r}")


class BoostInitialPromptTask(PlanTask):
"""Score the initial prompt quality and rewrite weak prompts before the pipeline runs."""
def requires(self):
return self.clone(SetupTask)

def output(self):
return {
'raw': self.local_target(FilenameEnum.BOOST_INITIAL_PROMPT_RAW),
'markdown': self.local_target(FilenameEnum.BOOST_INITIAL_PROMPT_MARKDOWN),
}

def run_inner(self):
llm_executor: LLMExecutor = self.create_llm_executor()

with self.input().open("r") as f:
plan_prompt = f.read()

result = BoostInitialPrompt.execute(llm_executor, plan_prompt)

result.save_raw(self.output()['raw'].path)
result.save_markdown(self.output()['markdown'].path)


class RedlineGateTask(PlanTask):
def requires(self):
return self.clone(SetupTask)
Expand Down Expand Up @@ -3739,6 +3763,7 @@ def requires(self):
return {
'start_time': self.clone(StartTimeTask),
'setup': self.clone(SetupTask),
'boost_initial_prompt': self.clone(BoostInitialPromptTask),
'redline_gate': self.clone(RedlineGateTask),
'premise_attack': self.clone(PremiseAttackTask),
'identify_purpose': self.clone(IdentifyPurposeTask),
Expand Down