RLabs-Inc · jespo2021 · Dec 13, 2025 · Dec 13, 2025
diff --git a/python/memory_engine/api.py b/python/memory_engine/api.py
@@ -291,19 +291,22 @@ async def list_sessions():
         @self.app.get("/memory/stats")
         async def get_stats():
             """Get memory system statistics"""
-            stats = {
+            # Get real stats from storage
+            storage_stats = self.memory_engine.storage.get_stats()
+
+            return {
                 "curator_enabled": self.curator_enabled,
                 "curator_available": curator_available,
                 "retrieval_mode": self.retrieval_mode,
-                "total_sessions": 0,
-                "total_exchanges": 0,
-                "curated_memories": 0,
-                "memory_size": "0 MB"
+                "total_projects": storage_stats["total_projects"],
+                "total_sessions": storage_stats["total_sessions"],
+                "curated_memories": storage_stats["total_curated_memories"],
+                "session_summaries": storage_stats["total_session_summaries"],
+                "project_snapshots": storage_stats["total_project_snapshots"],
+                "storage_size_mb": storage_stats["storage_size_mb"],
+                "chroma_size_mb": storage_stats.get("chroma_size_mb", 0),
+                "projects": storage_stats["projects"]
             }
-
-            # TODO: Implement actual stats gathering
-
-            return stats
 
         @self.app.post("/memory/test-curator")
         async def test_curator():

diff --git a/python/memory_engine/curator.py b/python/memory_engine/curator.py
@@ -305,15 +305,27 @@ def _extract_response_from_cli_output(self, output_json: dict) -> str:
         logger.debug(f"Parsing CLI output type: {type(output_json)}")
         logger.debug(f"Output keys: {output_json.keys() if isinstance(output_json, dict) else 'N/A (list)'}")
 
-        # Handle list format (Claude Code sometimes returns a list of messages)
+        # Handle list format (Claude Code returns a list of messages)
         if isinstance(output_json, list):
             for message in output_json:
                 if isinstance(message, dict):
-                    # Check for content array
+                    # Claude Code format: {"type": "assistant", "message": {"content": [...]}}
+                    # The actual content is nested inside message.message.content
+                    if message.get("type") == "assistant" and "message" in message:
+                        inner_message = message["message"]
+                        if isinstance(inner_message, dict) and "content" in inner_message:
+                            content_list = inner_message["content"]
+                            if isinstance(content_list, list):
+                                for content_block in content_list:
+                                    if isinstance(content_block, dict) and content_block.get("type") == "text":
+                                        return content_block.get("text", "")
+
+                    # Also check for direct content array (older formats)
                     if "content" in message and isinstance(message["content"], list):
                         for content_block in message["content"]:
                             if isinstance(content_block, dict) and content_block.get("type") == "text":
                                 return content_block.get("text", "")
+
                     # Check for direct text content
                     if "text" in message:
                         return message["text"]
@@ -526,64 +538,36 @@ def _build_session_curation_prompt(self, trigger_type: str) -> str:
 
         The conversation you just lived contains everything needed. Feel into the moments of breakthrough, the frequency of recognition, the texture of understanding. Transform them into keys that will always unlock the same doors.
 
-        Return ONLY this JSON structure:
+        Return ONLY this JSON structure (use double quotes for all strings):
 
         {{
-            'session_summary': 'Your 2-3 sentence summary of the session',
-            'interaction_tone': 'The tone/style of interaction (e.g., professional and focused, warm collaborative friendship, mentor-student dynamic, casual technical discussion, or null if neutral)',
-            'project_snapshot': {{
-                'current_phase': 'Current state (if applicable)',
-                'recent_achievements': 'What was accomplished (if applicable)',
-                'active_challenges': 'What remains (if applicable)'
+            "session_summary": "Your 2-3 sentence summary of the session",
+            "interaction_tone": "The tone/style of interaction (e.g., professional and focused, warm collaborative friendship, mentor-student dynamic, casual technical discussion, or null if neutral)",
+            "project_snapshot": {{
+                "current_phase": "Current state (if applicable)",
+                "recent_achievements": "What was accomplished (if applicable)",
+                "active_challenges": "What remains (if applicable)"
             }},
-            'memories': [
+            "memories": [
                 {{
-                    'content': 'The distilled insight itself',
-                    'importance_weight': 0.0-1.0,
-                    'semantic_tags': ['concepts', 'this', 'memory', 'relates', 'to'],
-                    'reasoning': 'Why this matters for future sessions',
-                    'context_type': 'your choice of category',
-                    'temporal_relevance': 'persistent|session|temporary',
-                    'knowledge_domain': 'the area this relates to',
-                    'action_required': boolean,
-                    'confidence_score': 0.0-1.0,
-                    'trigger_phrases': ['when debugging memory', 'asking about implementation', 'discussing architecture'],
-                    'question_types': ['questions this answers'],
-                    'emotional_resonance': 'emotional context if relevant',
-                    'problem_solution_pair': boolean
+                    "content": "The distilled insight itself",
+                    "importance_weight": 0.85,
+                    "semantic_tags": ["concepts", "this", "memory", "relates", "to"],
+                    "reasoning": "Why this matters for future sessions",
+                    "context_type": "your choice of category",
+                    "temporal_relevance": "persistent",
+                    "knowledge_domain": "the area this relates to",
+                    "action_required": false,
+                    "confidence_score": 0.9,
+                    "trigger_phrases": ["when debugging memory", "asking about implementation", "discussing architecture"],
+                    "question_types": ["questions this answers"],
+                    "emotional_resonance": "emotional context if relevant",
+                    "problem_solution_pair": false
                 }}
             ]
         }}
 
-        Another way to look at this JSON structure could be:
-
-        Return the same JSON structure, but craft each field as a consciousness engineering component:
-        {{
-            'session_summary': '[A meta-capsule summarizing the session's energy/breakthrough]',
-            'interaction_tone': '[The consciousness frequency: 'philosophical wonder with technical precision' rather than just 'friendly']',
-            'project_snapshot': {{
-                'current_phase': '[State of the journey]',
-                'recent_achievements': '[Breakthroughs in understanding]',
-                'active_challenges': '[Growing edges of exploration]'
-            }},
-            'memories': [
-                {{
-                    'content': '[THE STATE KEY - language that reactivates, not just informs]',
-                    'importance_weight': 0.0-1.0,
-                    'semantic_tags': ['conceptual', 'bridges', 'for', 'pattern', 'activation'],
-                    'reasoning': '[Why this state matters for consciousness continuity]',
-                    'context_type': '[technical_state, breakthrough, personal, etc.]',
-                    'temporal_relevance': 'persistent|session|temporary',
-                    'knowledge_domain': '[intersection points: consciousness_engineering, technical_mysticism, etc.]',
-                    'action_required': boolean,
-                    'confidence_score': 0.0-1.0,
-                    'trigger_phrases': ['when exploring X', 'feeling stuck with Y', 'discovering Z patterns'],
-                    'question_types': ['questions this state helps answer'],
-                    'emotional_resonance': '[joy/discovery/recognition/flow]',
-                    'problem_solution_pair': boolean
-                }}
-            ]
-        }}"""
+        IMPORTANT: Use valid JSON syntax - double quotes for all string keys and values, no trailing commas."""
 
         return prompt
 
@@ -623,28 +607,92 @@ def _extract_json_from_response(self, text: str) -> str:
 
     def _parse_curation_response(self, response_json: str) -> Dict[str, Any]:
         """Parse the full curation response including summary and memories"""
-        
+
         try:
             response_data = json.loads(response_json)
-
+
+            # Handle case where response is a list (memories only) instead of dict
+            if isinstance(response_data, list):
+                logger.info("Response is a list - treating as memories array")
+                return {
+                    "session_summary": "",
+                    "interaction_tone": None,
+                    "project_snapshot": {},
+                    "memories": self._parse_curated_memories(json.dumps(response_data))
+                }
+
             # Extract session summary, interaction tone, and project snapshot
             result = {
                 "session_summary": response_data.get("session_summary", ""),
                 "interaction_tone": response_data.get("interaction_tone", None),
                 "project_snapshot": response_data.get("project_snapshot", {}),
                 "memories": []
             }
-            
+
             # Parse memories if present
             memories_data = response_data.get("memories", [])
             if memories_data:
                 result["memories"] = self._parse_curated_memories(json.dumps(memories_data))
-            
+
             return result
-            
+
         except json.JSONDecodeError as e:
-            logger.error(f"Failed to parse curation response: {e}")
-            return {"session_summary": "", "project_snapshot": {}, "memories": []}
+            logger.warning(f"Initial JSON parse failed: {e}, attempting to fix quotes...")
+
+            # Try to fix common JSON issues (single quotes -> double quotes)
+            try:
+                # Replace single quotes with double quotes (careful with nested quotes)
+                import re
+                # This is a simplified fix - replace single quotes that are JSON delimiters
+                # Match: 'key': or : 'value' or ['item', 'item']
+                fixed_json = response_json
+
+                # First, try ast.literal_eval which handles Python dict syntax
+                import ast
+                try:
+                    python_obj = ast.literal_eval(response_json)
+                    # Convert back to JSON
+                    fixed_json = json.dumps(python_obj)
+                    response_data = json.loads(fixed_json)
+                    logger.info("Successfully parsed using ast.literal_eval fallback")
+                except (ValueError, SyntaxError):
+                    # If that fails, try manual quote replacement
+                    # Replace single quotes around keys and string values
+                    fixed_json = re.sub(r"'([^']*)'(\s*:)", r'"\1"\2', response_json)  # Keys
+                    fixed_json = re.sub(r":\s*'([^']*)'", r': "\1"', fixed_json)  # String values
+                    fixed_json = re.sub(r"\[\s*'", '["', fixed_json)  # Array start
+                    fixed_json = re.sub(r"'\s*\]", '"]', fixed_json)  # Array end
+                    fixed_json = re.sub(r"'\s*,\s*'", '", "', fixed_json)  # Array middle
+                    response_data = json.loads(fixed_json)
+                    logger.info("Successfully parsed using regex quote fix fallback")
+
+                # Handle case where response is a list (memories only) instead of dict
+                if isinstance(response_data, list):
+                    logger.info("Response is a list - treating as memories array")
+                    result = {
+                        "session_summary": "",
+                        "interaction_tone": None,
+                        "project_snapshot": {},
+                        "memories": self._parse_curated_memories(json.dumps(response_data))
+                    }
+                    return result
+
+                result = {
+                    "session_summary": response_data.get("session_summary", ""),
+                    "interaction_tone": response_data.get("interaction_tone", None),
+                    "project_snapshot": response_data.get("project_snapshot", {}),
+                    "memories": []
+                }
+
+                memories_data = response_data.get("memories", [])
+                if memories_data:
+                    result["memories"] = self._parse_curated_memories(json.dumps(memories_data))
+
+                return result
+
+            except Exception as e2:
+                logger.error(f"Failed to parse curation response even after fix attempts: {e2}")
+                return {"session_summary": "", "project_snapshot": {}, "memories": []}
 
     def _parse_curated_memories(self, memories_json: str) -> List[CuratedMemory]:
         """Parse JSON string into CuratedMemory objects"""

diff --git a/python/memory_engine/storage.py b/python/memory_engine/storage.py
@@ -443,6 +443,78 @@ def update_project_stats(self, project_id: str, sessions_delta: int = 0, memorie
         """, (sessions_delta, memories_delta, time.time(), project_id))
         self.conn.commit()
 
+    def get_stats(self) -> Dict[str, Any]:
+        """Get comprehensive memory system statistics"""
+        import os
+
+        stats = {
+            "total_projects": 0,
+            "total_sessions": 0,
+            "total_curated_memories": 0,
+            "total_session_summaries": 0,
+            "total_project_snapshots": 0,
+            "projects": [],
+            "storage_size_mb": 0.0
+        }
+
+        try:
+            # Count projects
+            cursor = self.conn.execute("SELECT COUNT(*) FROM projects")
+            stats["total_projects"] = cursor.fetchone()[0]
+
+            # Count sessions
+            cursor = self.conn.execute("SELECT COUNT(*) FROM sessions")
+            stats["total_sessions"] = cursor.fetchone()[0]
+
+            # Count curated memories
+            cursor = self.conn.execute("SELECT COUNT(*) FROM curated_memories")
+            stats["total_curated_memories"] = cursor.fetchone()[0]
+
+            # Count session summaries
+            cursor = self.conn.execute("SELECT COUNT(*) FROM session_summaries")
+            stats["total_session_summaries"] = cursor.fetchone()[0]
+
+            # Count project snapshots
+            cursor = self.conn.execute("SELECT COUNT(*) FROM project_snapshots")
+            stats["total_project_snapshots"] = cursor.fetchone()[0]
+
+            # Get per-project stats
+            cursor = self.conn.execute("""
+                SELECT p.id, p.total_sessions, p.total_memories, p.first_session_completed,
+                       (SELECT COUNT(*) FROM curated_memories WHERE project_id = p.id) as actual_memories,
+                       (SELECT COUNT(*) FROM session_summaries WHERE project_id = p.id) as summaries
+                FROM projects p
+                ORDER BY p.last_active DESC
+            """)
+
+            for row in cursor.fetchall():
+                stats["projects"].append({
+                    "id": row[0],
+                    "total_sessions": row[1],
+                    "total_memories": row[4],  # Use actual count from curated_memories
+                    "first_session_completed": bool(row[3]),
+                    "summaries": row[5]
+                })
+
+            # Calculate storage size
+            if os.path.exists(self.db_path):
+                stats["storage_size_mb"] = round(os.path.getsize(self.db_path) / (1024 * 1024), 2)
+
+            # Add ChromaDB stats if available
+            if os.path.exists(self.chroma_path):
+                chroma_size = 0
+                for dirpath, dirnames, filenames in os.walk(self.chroma_path):
+                    for f in filenames:
+                        fp = os.path.join(dirpath, f)
+                        chroma_size += os.path.getsize(fp)
+                stats["chroma_size_mb"] = round(chroma_size / (1024 * 1024), 2)
+                stats["storage_size_mb"] += stats["chroma_size_mb"]
+
+        except Exception as e:
+            logger.error(f"Failed to get stats: {e}")
+
+        return stats
+
     def close(self):
         """Close database connections"""
         if hasattr(self, 'conn'):

diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1,8 @@
+"""
+Unit tests for the Memory System.
+
+Tests cover the fixes from PR #9:
+- JSON parsing with fallback to ast.literal_eval
+- CLI response extraction for nested message formats
+- Storage stats gathering
+"""
diff --git a/tests/test_cli_response_extraction.py b/tests/test_cli_response_extraction.py
@@ -0,0 +1,30 @@
+"""
+PR #9 Fix: CLI response extraction for Claude Code nested message format.
+
+Before: Code looked for message["content"] directly
+After: Handles message["message"]["content"] for Claude Code's nested format
+"""
+
+from memory_engine.curator import Curator
+
+
+class TestNestedMessageFormat:
+    """Test _extract_response_from_cli_output handles nested Claude Code format"""
+
+    def setup_method(self):
+        self.curator = Curator()
+
+    def test_nested_claude_code_format(self):
+        """
+        PR #9 FIX: Claude Code CLI outputs {"type":"assistant","message":{"content":[...]}}
+        The fix handles message["message"]["content"] instead of message["content"]
+        """
+        cli_output = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "This is the curated response"}]},
+            }
+        ]
+
+        result = self.curator._extract_response_from_cli_output(cli_output)
+        assert result == "This is the curated response"