Claude Code Memory Compiler

2026-04-06 09:26:30 -05:00 · 2026-04-06 09:26:30 -05:00 · f83d38d787
commit f83d38d787
15 changed files with 2819 additions and 0 deletions
--- a/hooks/pre-compact.py
+++ b/hooks/pre-compact.py
@ -0,0 +1,170 @@
+"""
+PreCompact hook - captures conversation transcript before auto-compaction.
+
+When Claude Code's context window fills up, it auto-compacts (summarizes and
+discards detail). This hook fires BEFORE that happens, extracting conversation
+context and spawning flush.py to extract knowledge that would otherwise
+be lost to summarization.
+
+The hook itself does NO API calls - only local file I/O for speed (<10s).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+import subprocess
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+# Recursion guard
+if os.environ.get("CLAUDE_INVOKED_BY"):
+    sys.exit(0)
+
+ROOT = Path(__file__).resolve().parent.parent
+SCRIPTS_DIR = ROOT / "scripts"
+STATE_DIR = SCRIPTS_DIR
+
+logging.basicConfig(
+    filename=str(SCRIPTS_DIR / "flush.log"),
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s [pre-compact] %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+MAX_TURNS = 30
+MAX_CONTEXT_CHARS = 15_000
+MIN_TURNS_TO_FLUSH = 5
+
+
+def extract_conversation_context(transcript_path: Path) -> tuple[str, int]:
+    """Read JSONL transcript and extract last ~N conversation turns as markdown."""
+    turns: list[str] = []
+
+    with open(transcript_path, encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                entry = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+
+            msg = entry.get("message", {})
+            if isinstance(msg, dict):
+                role = msg.get("role", "")
+                content = msg.get("content", "")
+            else:
+                role = entry.get("role", "")
+                content = entry.get("content", "")
+
+            if role not in ("user", "assistant"):
+                continue
+
+            if isinstance(content, list):
+                text_parts = []
+                for block in content:
+                    if isinstance(block, dict) and block.get("type") == "text":
+                        text_parts.append(block.get("text", ""))
+                    elif isinstance(block, str):
+                        text_parts.append(block)
+                content = "\n".join(text_parts)
+
+            if isinstance(content, str) and content.strip():
+                label = "User" if role == "user" else "Assistant"
+                turns.append(f"**{label}:** {content.strip()}\n")
+
+    recent = turns[-MAX_TURNS:]
+    context = "\n".join(recent)
+
+    if len(context) > MAX_CONTEXT_CHARS:
+        context = context[-MAX_CONTEXT_CHARS:]
+        boundary = context.find("\n**")
+        if boundary > 0:
+            context = context[boundary + 1 :]
+
+    return context, len(recent)
+
+
+def main() -> None:
+    # Read hook input from stdin
+    try:
+        raw_input = sys.stdin.read()
+        try:
+            hook_input: dict = json.loads(raw_input)
+        except json.JSONDecodeError:
+            fixed_input = re.sub(r'(?<!\\)\\(?!["\\])', r'\\\\', raw_input)
+            hook_input = json.loads(fixed_input)
+    except (json.JSONDecodeError, ValueError, EOFError) as e:
+        logging.error("Failed to parse stdin: %s", e)
+        return
+
+    session_id = hook_input.get("session_id", "unknown")
+    transcript_path_str = hook_input.get("transcript_path", "")
+
+    logging.info("PreCompact fired: session=%s", session_id)
+
+    # transcript_path can be empty (known Claude Code bug #13668)
+    if not transcript_path_str or not isinstance(transcript_path_str, str):
+        logging.info("SKIP: no transcript path")
+        return
+
+    transcript_path = Path(transcript_path_str)
+    if not transcript_path.exists():
+        logging.info("SKIP: transcript missing: %s", transcript_path_str)
+        return
+
+    # Extract conversation context in the hook
+    try:
+        context, turn_count = extract_conversation_context(transcript_path)
+    except Exception as e:
+        logging.error("Context extraction failed: %s", e)
+        return
+
+    if not context.strip():
+        logging.info("SKIP: empty context")
+        return
+
+    if turn_count < MIN_TURNS_TO_FLUSH:
+        logging.info("SKIP: only %d turns (min %d)", turn_count, MIN_TURNS_TO_FLUSH)
+        return
+
+    # Write context to a temp file for the background process
+    timestamp = datetime.now(timezone.utc).astimezone().strftime("%Y%m%d-%H%M%S")
+    context_file = STATE_DIR / f"flush-context-{session_id}-{timestamp}.md"
+    context_file.write_text(context, encoding="utf-8")
+
+    # Spawn flush.py as a background process
+    flush_script = SCRIPTS_DIR / "flush.py"
+
+    cmd = [
+        "uv",
+        "run",
+        "--directory",
+        str(ROOT),
+        "python",
+        str(flush_script),
+        str(context_file),
+        session_id,
+    ]
+
+    creation_flags = subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0
+
+    try:
+        subprocess.Popen(
+            cmd,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            creationflags=creation_flags,
+        )
+        logging.info("Spawned flush.py for session %s (%d turns, %d chars)", session_id, turn_count, len(context))
+    except Exception as e:
+        logging.error("Failed to spawn flush.py: %s", e)
+
+
+if __name__ == "__main__":
+    main()
--- a/hooks/session-end.py
+++ b/hooks/session-end.py
@ -0,0 +1,174 @@
+"""
+SessionEnd hook - captures conversation transcript for memory extraction.
+
+When a Claude Code session ends, this hook reads the transcript path from
+stdin, extracts conversation context, and spawns flush.py as a background
+process to extract knowledge into the daily log.
+
+The hook itself does NO API calls - only local file I/O for speed (<10s).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+import subprocess
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+# Recursion guard: if we were spawned by flush.py (which calls Agent SDK,
+# which runs Claude Code, which would fire this hook again), exit immediately.
+if os.environ.get("CLAUDE_INVOKED_BY"):
+    sys.exit(0)
+
+ROOT = Path(__file__).resolve().parent.parent
+DAILY_DIR = ROOT / "daily"
+SCRIPTS_DIR = ROOT / "scripts"
+STATE_DIR = SCRIPTS_DIR
+
+logging.basicConfig(
+    filename=str(SCRIPTS_DIR / "flush.log"),
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s [hook] %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+MAX_TURNS = 30
+MAX_CONTEXT_CHARS = 15_000
+MIN_TURNS_TO_FLUSH = 1
+
+
+def extract_conversation_context(transcript_path: Path) -> tuple[str, int]:
+    """Read JSONL transcript and extract last ~N conversation turns as markdown."""
+    turns: list[str] = []
+
+    with open(transcript_path, encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                entry = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+
+            msg = entry.get("message", {})
+            if isinstance(msg, dict):
+                role = msg.get("role", "")
+                content = msg.get("content", "")
+            else:
+                role = entry.get("role", "")
+                content = entry.get("content", "")
+
+            if role not in ("user", "assistant"):
+                continue
+
+            if isinstance(content, list):
+                text_parts = []
+                for block in content:
+                    if isinstance(block, dict) and block.get("type") == "text":
+                        text_parts.append(block.get("text", ""))
+                    elif isinstance(block, str):
+                        text_parts.append(block)
+                content = "\n".join(text_parts)
+
+            if isinstance(content, str) and content.strip():
+                label = "User" if role == "user" else "Assistant"
+                turns.append(f"**{label}:** {content.strip()}\n")
+
+    recent = turns[-MAX_TURNS:]
+    context = "\n".join(recent)
+
+    if len(context) > MAX_CONTEXT_CHARS:
+        context = context[-MAX_CONTEXT_CHARS:]
+        boundary = context.find("\n**")
+        if boundary > 0:
+            context = context[boundary + 1 :]
+
+    return context, len(recent)
+
+
+def main() -> None:
+    # Read hook input from stdin
+    # Claude Code on Windows may pass paths with unescaped backslashes
+    try:
+        raw_input = sys.stdin.read()
+        try:
+            hook_input: dict = json.loads(raw_input)
+        except json.JSONDecodeError:
+            fixed_input = re.sub(r'(?<!\\)\\(?!["\\])', r'\\\\', raw_input)
+            hook_input = json.loads(fixed_input)
+    except (json.JSONDecodeError, ValueError, EOFError) as e:
+        logging.error("Failed to parse stdin: %s", e)
+        return
+
+    session_id = hook_input.get("session_id", "unknown")
+    source = hook_input.get("source", "unknown")
+    transcript_path_str = hook_input.get("transcript_path", "")
+
+    logging.info("SessionEnd fired: session=%s source=%s", session_id, source)
+
+    if not transcript_path_str or not isinstance(transcript_path_str, str):
+        logging.info("SKIP: no transcript path")
+        return
+
+    transcript_path = Path(transcript_path_str)
+    if not transcript_path.exists():
+        logging.info("SKIP: transcript missing: %s", transcript_path_str)
+        return
+
+    # Extract conversation context in the hook (fast, no API calls)
+    try:
+        context, turn_count = extract_conversation_context(transcript_path)
+    except Exception as e:
+        logging.error("Context extraction failed: %s", e)
+        return
+
+    if not context.strip():
+        logging.info("SKIP: empty context")
+        return
+
+    if turn_count < MIN_TURNS_TO_FLUSH:
+        logging.info("SKIP: only %d turns (min %d)", turn_count, MIN_TURNS_TO_FLUSH)
+        return
+
+    # Write context to a temp file for the background process
+    timestamp = datetime.now(timezone.utc).astimezone().strftime("%Y%m%d-%H%M%S")
+    context_file = STATE_DIR / f"session-flush-{session_id}-{timestamp}.md"
+    context_file.write_text(context, encoding="utf-8")
+
+    # Spawn flush.py as a background process
+    flush_script = SCRIPTS_DIR / "flush.py"
+
+    cmd = [
+        "uv",
+        "run",
+        "--directory",
+        str(ROOT),
+        "python",
+        str(flush_script),
+        str(context_file),
+        session_id,
+    ]
+
+    # On Windows, use CREATE_NO_WINDOW to avoid flash console window.
+    # Do NOT use DETACHED_PROCESS — it breaks the Agent SDK's subprocess I/O.
+    creation_flags = subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0
+
+    try:
+        subprocess.Popen(
+            cmd,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            creationflags=creation_flags,
+        )
+        logging.info("Spawned flush.py for session %s (%d turns, %d chars)", session_id, turn_count, len(context))
+    except Exception as e:
+        logging.error("Failed to spawn flush.py: %s", e)
+
+
+if __name__ == "__main__":
+    main()
--- a/hooks/session-start.py
+++ b/hooks/session-start.py
@ -0,0 +1,92 @@
+"""
+SessionStart hook - injects knowledge base context into every conversation.
+
+This is the "context injection" layer. When Claude Code starts a session,
+this hook reads the knowledge base index and recent daily log, then injects
+them as additional context so Claude always "remembers" what it has learned.
+
+Configure in .claude/settings.json:
+{
+    "hooks": {
+        "SessionStart": [{
+            "matcher": "",
+            "command": "uv run python hooks/session-start.py"
+        }]
+    }
+}
+"""
+
+import json
+import sys
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+
+# Paths relative to project root
+ROOT = Path(__file__).resolve().parent.parent
+KNOWLEDGE_DIR = ROOT / "knowledge"
+DAILY_DIR = ROOT / "daily"
+INDEX_FILE = KNOWLEDGE_DIR / "index.md"
+
+MAX_CONTEXT_CHARS = 20_000
+MAX_LOG_LINES = 30
+
+
+def get_recent_log() -> str:
+    """Read the most recent daily log (today or yesterday)."""
+    today = datetime.now(timezone.utc).astimezone()
+
+    for offset in range(2):
+        date = today - timedelta(days=offset)
+        log_path = DAILY_DIR / f"{date.strftime('%Y-%m-%d')}.md"
+        if log_path.exists():
+            lines = log_path.read_text(encoding="utf-8").splitlines()
+            # Return last N lines to keep context small
+            recent = lines[-MAX_LOG_LINES:] if len(lines) > MAX_LOG_LINES else lines
+            return "\n".join(recent)
+
+    return "(no recent daily log)"
+
+
+def build_context() -> str:
+    """Assemble the context to inject into the conversation."""
+    parts = []
+
+    # Today's date
+    today = datetime.now(timezone.utc).astimezone()
+    parts.append(f"## Today\n{today.strftime('%A, %B %d, %Y')}")
+
+    # Knowledge base index (the core retrieval mechanism)
+    if INDEX_FILE.exists():
+        index_content = INDEX_FILE.read_text(encoding="utf-8")
+        parts.append(f"## Knowledge Base Index\n\n{index_content}")
+    else:
+        parts.append("## Knowledge Base Index\n\n(empty - no articles compiled yet)")
+
+    # Recent daily log
+    recent_log = get_recent_log()
+    parts.append(f"## Recent Daily Log\n\n{recent_log}")
+
+    context = "\n\n---\n\n".join(parts)
+
+    # Truncate if too long
+    if len(context) > MAX_CONTEXT_CHARS:
+        context = context[:MAX_CONTEXT_CHARS] + "\n\n...(truncated)"
+
+    return context
+
+
+def main():
+    context = build_context()
+
+    output = {
+        "hookSpecificOutput": {
+            "hookEventName": "SessionStart",
+            "additionalContext": context,
+        }
+    }
+
+    print(json.dumps(output))
+
+
+if __name__ == "__main__":
+    main()