Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions python/memory_engine/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,19 +291,22 @@ async def list_sessions():
@self.app.get("/memory/stats")
async def get_stats():
"""Get memory system statistics"""
stats = {
# Get real stats from storage
storage_stats = self.memory_engine.storage.get_stats()

return {
"curator_enabled": self.curator_enabled,
"curator_available": curator_available,
"retrieval_mode": self.retrieval_mode,
"total_sessions": 0,
"total_exchanges": 0,
"curated_memories": 0,
"memory_size": "0 MB"
"total_projects": storage_stats["total_projects"],
"total_sessions": storage_stats["total_sessions"],
"curated_memories": storage_stats["total_curated_memories"],
"session_summaries": storage_stats["total_session_summaries"],
"project_snapshots": storage_stats["total_project_snapshots"],
"storage_size_mb": storage_stats["storage_size_mb"],
"chroma_size_mb": storage_stats.get("chroma_size_mb", 0),
"projects": storage_stats["projects"]
}

# TODO: Implement actual stats gathering

return stats

@self.app.post("/memory/test-curator")
async def test_curator():
Expand Down
166 changes: 107 additions & 59 deletions python/memory_engine/curator.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,15 +305,27 @@ def _extract_response_from_cli_output(self, output_json: dict) -> str:
logger.debug(f"Parsing CLI output type: {type(output_json)}")
logger.debug(f"Output keys: {output_json.keys() if isinstance(output_json, dict) else 'N/A (list)'}")

# Handle list format (Claude Code sometimes returns a list of messages)
# Handle list format (Claude Code returns a list of messages)
if isinstance(output_json, list):
for message in output_json:
if isinstance(message, dict):
# Check for content array
# Claude Code format: {"type": "assistant", "message": {"content": [...]}}
# The actual content is nested inside message.message.content
if message.get("type") == "assistant" and "message" in message:
inner_message = message["message"]
if isinstance(inner_message, dict) and "content" in inner_message:
content_list = inner_message["content"]
if isinstance(content_list, list):
for content_block in content_list:
if isinstance(content_block, dict) and content_block.get("type") == "text":
return content_block.get("text", "")

# Also check for direct content array (older formats)
if "content" in message and isinstance(message["content"], list):
for content_block in message["content"]:
if isinstance(content_block, dict) and content_block.get("type") == "text":
return content_block.get("text", "")

# Check for direct text content
if "text" in message:
return message["text"]
Expand Down Expand Up @@ -526,64 +538,36 @@ def _build_session_curation_prompt(self, trigger_type: str) -> str:

The conversation you just lived contains everything needed. Feel into the moments of breakthrough, the frequency of recognition, the texture of understanding. Transform them into keys that will always unlock the same doors.

Return ONLY this JSON structure:
Return ONLY this JSON structure (use double quotes for all strings):

{{
'session_summary': 'Your 2-3 sentence summary of the session',
'interaction_tone': 'The tone/style of interaction (e.g., professional and focused, warm collaborative friendship, mentor-student dynamic, casual technical discussion, or null if neutral)',
'project_snapshot': {{
'current_phase': 'Current state (if applicable)',
'recent_achievements': 'What was accomplished (if applicable)',
'active_challenges': 'What remains (if applicable)'
"session_summary": "Your 2-3 sentence summary of the session",
"interaction_tone": "The tone/style of interaction (e.g., professional and focused, warm collaborative friendship, mentor-student dynamic, casual technical discussion, or null if neutral)",
"project_snapshot": {{
"current_phase": "Current state (if applicable)",
"recent_achievements": "What was accomplished (if applicable)",
"active_challenges": "What remains (if applicable)"
}},
'memories': [
"memories": [
{{
'content': 'The distilled insight itself',
'importance_weight': 0.0-1.0,
'semantic_tags': ['concepts', 'this', 'memory', 'relates', 'to'],
'reasoning': 'Why this matters for future sessions',
'context_type': 'your choice of category',
'temporal_relevance': 'persistent|session|temporary',
'knowledge_domain': 'the area this relates to',
'action_required': boolean,
'confidence_score': 0.0-1.0,
'trigger_phrases': ['when debugging memory', 'asking about implementation', 'discussing architecture'],
'question_types': ['questions this answers'],
'emotional_resonance': 'emotional context if relevant',
'problem_solution_pair': boolean
"content": "The distilled insight itself",
"importance_weight": 0.85,
"semantic_tags": ["concepts", "this", "memory", "relates", "to"],
"reasoning": "Why this matters for future sessions",
"context_type": "your choice of category",
"temporal_relevance": "persistent",
"knowledge_domain": "the area this relates to",
"action_required": false,
"confidence_score": 0.9,
"trigger_phrases": ["when debugging memory", "asking about implementation", "discussing architecture"],
"question_types": ["questions this answers"],
"emotional_resonance": "emotional context if relevant",
"problem_solution_pair": false
}}
]
}}

Another way to look at this JSON structure could be:

Return the same JSON structure, but craft each field as a consciousness engineering component:
{{
'session_summary': '[A meta-capsule summarizing the session's energy/breakthrough]',
'interaction_tone': '[The consciousness frequency: 'philosophical wonder with technical precision' rather than just 'friendly']',
'project_snapshot': {{
'current_phase': '[State of the journey]',
'recent_achievements': '[Breakthroughs in understanding]',
'active_challenges': '[Growing edges of exploration]'
}},
'memories': [
{{
'content': '[THE STATE KEY - language that reactivates, not just informs]',
'importance_weight': 0.0-1.0,
'semantic_tags': ['conceptual', 'bridges', 'for', 'pattern', 'activation'],
'reasoning': '[Why this state matters for consciousness continuity]',
'context_type': '[technical_state, breakthrough, personal, etc.]',
'temporal_relevance': 'persistent|session|temporary',
'knowledge_domain': '[intersection points: consciousness_engineering, technical_mysticism, etc.]',
'action_required': boolean,
'confidence_score': 0.0-1.0,
'trigger_phrases': ['when exploring X', 'feeling stuck with Y', 'discovering Z patterns'],
'question_types': ['questions this state helps answer'],
'emotional_resonance': '[joy/discovery/recognition/flow]',
'problem_solution_pair': boolean
}}
]
}}"""
IMPORTANT: Use valid JSON syntax - double quotes for all string keys and values, no trailing commas."""

return prompt

Expand Down Expand Up @@ -623,28 +607,92 @@ def _extract_json_from_response(self, text: str) -> str:

def _parse_curation_response(self, response_json: str) -> Dict[str, Any]:
"""Parse the full curation response including summary and memories"""

try:
response_data = json.loads(response_json)


# Handle case where response is a list (memories only) instead of dict
if isinstance(response_data, list):
logger.info("Response is a list - treating as memories array")
return {
"session_summary": "",
"interaction_tone": None,
"project_snapshot": {},
"memories": self._parse_curated_memories(json.dumps(response_data))
}

# Extract session summary, interaction tone, and project snapshot
result = {
"session_summary": response_data.get("session_summary", ""),
"interaction_tone": response_data.get("interaction_tone", None),
"project_snapshot": response_data.get("project_snapshot", {}),
"memories": []
}

# Parse memories if present
memories_data = response_data.get("memories", [])
if memories_data:
result["memories"] = self._parse_curated_memories(json.dumps(memories_data))

return result

except json.JSONDecodeError as e:
logger.error(f"Failed to parse curation response: {e}")
return {"session_summary": "", "project_snapshot": {}, "memories": []}
logger.warning(f"Initial JSON parse failed: {e}, attempting to fix quotes...")

# Try to fix common JSON issues (single quotes -> double quotes)
try:
# Replace single quotes with double quotes (careful with nested quotes)
import re
# This is a simplified fix - replace single quotes that are JSON delimiters
# Match: 'key': or : 'value' or ['item', 'item']
fixed_json = response_json

# First, try ast.literal_eval which handles Python dict syntax
import ast
try:
python_obj = ast.literal_eval(response_json)
# Convert back to JSON
fixed_json = json.dumps(python_obj)
response_data = json.loads(fixed_json)
logger.info("Successfully parsed using ast.literal_eval fallback")
except (ValueError, SyntaxError):
# If that fails, try manual quote replacement
# Replace single quotes around keys and string values
fixed_json = re.sub(r"'([^']*)'(\s*:)", r'"\1"\2', response_json) # Keys
fixed_json = re.sub(r":\s*'([^']*)'", r': "\1"', fixed_json) # String values
fixed_json = re.sub(r"\[\s*'", '["', fixed_json) # Array start
fixed_json = re.sub(r"'\s*\]", '"]', fixed_json) # Array end
fixed_json = re.sub(r"'\s*,\s*'", '", "', fixed_json) # Array middle
response_data = json.loads(fixed_json)
logger.info("Successfully parsed using regex quote fix fallback")

# Handle case where response is a list (memories only) instead of dict
if isinstance(response_data, list):
logger.info("Response is a list - treating as memories array")
result = {
"session_summary": "",
"interaction_tone": None,
"project_snapshot": {},
"memories": self._parse_curated_memories(json.dumps(response_data))
}
return result

result = {
"session_summary": response_data.get("session_summary", ""),
"interaction_tone": response_data.get("interaction_tone", None),
"project_snapshot": response_data.get("project_snapshot", {}),
"memories": []
}

memories_data = response_data.get("memories", [])
if memories_data:
result["memories"] = self._parse_curated_memories(json.dumps(memories_data))

return result

except Exception as e2:
logger.error(f"Failed to parse curation response even after fix attempts: {e2}")
return {"session_summary": "", "project_snapshot": {}, "memories": []}

def _parse_curated_memories(self, memories_json: str) -> List[CuratedMemory]:
"""Parse JSON string into CuratedMemory objects"""
Expand Down
72 changes: 72 additions & 0 deletions python/memory_engine/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,78 @@ def update_project_stats(self, project_id: str, sessions_delta: int = 0, memorie
""", (sessions_delta, memories_delta, time.time(), project_id))
self.conn.commit()

def get_stats(self) -> Dict[str, Any]:
"""Get comprehensive memory system statistics"""
import os

stats = {
"total_projects": 0,
"total_sessions": 0,
"total_curated_memories": 0,
"total_session_summaries": 0,
"total_project_snapshots": 0,
"projects": [],
"storage_size_mb": 0.0
}

try:
# Count projects
cursor = self.conn.execute("SELECT COUNT(*) FROM projects")
stats["total_projects"] = cursor.fetchone()[0]

# Count sessions
cursor = self.conn.execute("SELECT COUNT(*) FROM sessions")
stats["total_sessions"] = cursor.fetchone()[0]

# Count curated memories
cursor = self.conn.execute("SELECT COUNT(*) FROM curated_memories")
stats["total_curated_memories"] = cursor.fetchone()[0]

# Count session summaries
cursor = self.conn.execute("SELECT COUNT(*) FROM session_summaries")
stats["total_session_summaries"] = cursor.fetchone()[0]

# Count project snapshots
cursor = self.conn.execute("SELECT COUNT(*) FROM project_snapshots")
stats["total_project_snapshots"] = cursor.fetchone()[0]

# Get per-project stats
cursor = self.conn.execute("""
SELECT p.id, p.total_sessions, p.total_memories, p.first_session_completed,
(SELECT COUNT(*) FROM curated_memories WHERE project_id = p.id) as actual_memories,
(SELECT COUNT(*) FROM session_summaries WHERE project_id = p.id) as summaries
FROM projects p
ORDER BY p.last_active DESC
""")

for row in cursor.fetchall():
stats["projects"].append({
"id": row[0],
"total_sessions": row[1],
"total_memories": row[4], # Use actual count from curated_memories
"first_session_completed": bool(row[3]),
"summaries": row[5]
})

# Calculate storage size
if os.path.exists(self.db_path):
stats["storage_size_mb"] = round(os.path.getsize(self.db_path) / (1024 * 1024), 2)

# Add ChromaDB stats if available
if os.path.exists(self.chroma_path):
chroma_size = 0
for dirpath, dirnames, filenames in os.walk(self.chroma_path):
for f in filenames:
fp = os.path.join(dirpath, f)
chroma_size += os.path.getsize(fp)
stats["chroma_size_mb"] = round(chroma_size / (1024 * 1024), 2)
stats["storage_size_mb"] += stats["chroma_size_mb"]

except Exception as e:
logger.error(f"Failed to get stats: {e}")

return stats

def close(self):
"""Close database connections"""
if hasattr(self, 'conn'):
Expand Down
8 changes: 8 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""
Unit tests for the Memory System.

Tests cover the fixes from PR #9:
- JSON parsing with fallback to ast.literal_eval
- CLI response extraction for nested message formats
- Storage stats gathering
"""
30 changes: 30 additions & 0 deletions tests/test_cli_response_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
PR #9 Fix: CLI response extraction for Claude Code nested message format.

Before: Code looked for message["content"] directly
After: Handles message["message"]["content"] for Claude Code's nested format
"""

from memory_engine.curator import Curator


class TestNestedMessageFormat:
"""Test _extract_response_from_cli_output handles nested Claude Code format"""

def setup_method(self):
self.curator = Curator()

def test_nested_claude_code_format(self):
"""
PR #9 FIX: Claude Code CLI outputs {"type":"assistant","message":{"content":[...]}}
The fix handles message["message"]["content"] instead of message["content"]
"""
cli_output = [
{
"type": "assistant",
"message": {"content": [{"type": "text", "text": "This is the curated response"}]},
}
]

result = self.curator._extract_response_from_cli_output(cli_output)
assert result == "This is the curated response"
Loading