Claude Code Memory Compiler

This commit is contained in:
Cole Medin 2026-04-06 09:26:30 -05:00
commit f83d38d787
15 changed files with 2819 additions and 0 deletions

170
hooks/pre-compact.py Normal file
View file

@ -0,0 +1,170 @@
"""
PreCompact hook - captures conversation transcript before auto-compaction.
When Claude Code's context window fills up, it auto-compacts (summarizes and
discards detail). This hook fires BEFORE that happens, extracting conversation
context and spawning flush.py to extract knowledge that would otherwise
be lost to summarization.
The hook itself does NO API calls - only local file I/O for speed (<10s).
"""
from __future__ import annotations
import json
import logging
import os
import re
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
# Recursion guard
if os.environ.get("CLAUDE_INVOKED_BY"):
sys.exit(0)
ROOT = Path(__file__).resolve().parent.parent
SCRIPTS_DIR = ROOT / "scripts"
STATE_DIR = SCRIPTS_DIR
logging.basicConfig(
filename=str(SCRIPTS_DIR / "flush.log"),
level=logging.INFO,
format="%(asctime)s %(levelname)s [pre-compact] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
MAX_TURNS = 30
MAX_CONTEXT_CHARS = 15_000
MIN_TURNS_TO_FLUSH = 5
def extract_conversation_context(transcript_path: Path) -> tuple[str, int]:
"""Read JSONL transcript and extract last ~N conversation turns as markdown."""
turns: list[str] = []
with open(transcript_path, encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
entry = json.loads(line)
except json.JSONDecodeError:
continue
msg = entry.get("message", {})
if isinstance(msg, dict):
role = msg.get("role", "")
content = msg.get("content", "")
else:
role = entry.get("role", "")
content = entry.get("content", "")
if role not in ("user", "assistant"):
continue
if isinstance(content, list):
text_parts = []
for block in content:
if isinstance(block, dict) and block.get("type") == "text":
text_parts.append(block.get("text", ""))
elif isinstance(block, str):
text_parts.append(block)
content = "\n".join(text_parts)
if isinstance(content, str) and content.strip():
label = "User" if role == "user" else "Assistant"
turns.append(f"**{label}:** {content.strip()}\n")
recent = turns[-MAX_TURNS:]
context = "\n".join(recent)
if len(context) > MAX_CONTEXT_CHARS:
context = context[-MAX_CONTEXT_CHARS:]
boundary = context.find("\n**")
if boundary > 0:
context = context[boundary + 1 :]
return context, len(recent)
def main() -> None:
# Read hook input from stdin
try:
raw_input = sys.stdin.read()
try:
hook_input: dict = json.loads(raw_input)
except json.JSONDecodeError:
fixed_input = re.sub(r'(?<!\\)\\(?!["\\])', r'\\\\', raw_input)
hook_input = json.loads(fixed_input)
except (json.JSONDecodeError, ValueError, EOFError) as e:
logging.error("Failed to parse stdin: %s", e)
return
session_id = hook_input.get("session_id", "unknown")
transcript_path_str = hook_input.get("transcript_path", "")
logging.info("PreCompact fired: session=%s", session_id)
# transcript_path can be empty (known Claude Code bug #13668)
if not transcript_path_str or not isinstance(transcript_path_str, str):
logging.info("SKIP: no transcript path")
return
transcript_path = Path(transcript_path_str)
if not transcript_path.exists():
logging.info("SKIP: transcript missing: %s", transcript_path_str)
return
# Extract conversation context in the hook
try:
context, turn_count = extract_conversation_context(transcript_path)
except Exception as e:
logging.error("Context extraction failed: %s", e)
return
if not context.strip():
logging.info("SKIP: empty context")
return
if turn_count < MIN_TURNS_TO_FLUSH:
logging.info("SKIP: only %d turns (min %d)", turn_count, MIN_TURNS_TO_FLUSH)
return
# Write context to a temp file for the background process
timestamp = datetime.now(timezone.utc).astimezone().strftime("%Y%m%d-%H%M%S")
context_file = STATE_DIR / f"flush-context-{session_id}-{timestamp}.md"
context_file.write_text(context, encoding="utf-8")
# Spawn flush.py as a background process
flush_script = SCRIPTS_DIR / "flush.py"
cmd = [
"uv",
"run",
"--directory",
str(ROOT),
"python",
str(flush_script),
str(context_file),
session_id,
]
creation_flags = subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0
try:
subprocess.Popen(
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
creationflags=creation_flags,
)
logging.info("Spawned flush.py for session %s (%d turns, %d chars)", session_id, turn_count, len(context))
except Exception as e:
logging.error("Failed to spawn flush.py: %s", e)
if __name__ == "__main__":
main()

174
hooks/session-end.py Normal file
View file

@ -0,0 +1,174 @@
"""
SessionEnd hook - captures conversation transcript for memory extraction.
When a Claude Code session ends, this hook reads the transcript path from
stdin, extracts conversation context, and spawns flush.py as a background
process to extract knowledge into the daily log.
The hook itself does NO API calls - only local file I/O for speed (<10s).
"""
from __future__ import annotations
import json
import logging
import os
import re
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
# Recursion guard: if we were spawned by flush.py (which calls Agent SDK,
# which runs Claude Code, which would fire this hook again), exit immediately.
if os.environ.get("CLAUDE_INVOKED_BY"):
sys.exit(0)
ROOT = Path(__file__).resolve().parent.parent
DAILY_DIR = ROOT / "daily"
SCRIPTS_DIR = ROOT / "scripts"
STATE_DIR = SCRIPTS_DIR
logging.basicConfig(
filename=str(SCRIPTS_DIR / "flush.log"),
level=logging.INFO,
format="%(asctime)s %(levelname)s [hook] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
MAX_TURNS = 30
MAX_CONTEXT_CHARS = 15_000
MIN_TURNS_TO_FLUSH = 1
def extract_conversation_context(transcript_path: Path) -> tuple[str, int]:
"""Read JSONL transcript and extract last ~N conversation turns as markdown."""
turns: list[str] = []
with open(transcript_path, encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
entry = json.loads(line)
except json.JSONDecodeError:
continue
msg = entry.get("message", {})
if isinstance(msg, dict):
role = msg.get("role", "")
content = msg.get("content", "")
else:
role = entry.get("role", "")
content = entry.get("content", "")
if role not in ("user", "assistant"):
continue
if isinstance(content, list):
text_parts = []
for block in content:
if isinstance(block, dict) and block.get("type") == "text":
text_parts.append(block.get("text", ""))
elif isinstance(block, str):
text_parts.append(block)
content = "\n".join(text_parts)
if isinstance(content, str) and content.strip():
label = "User" if role == "user" else "Assistant"
turns.append(f"**{label}:** {content.strip()}\n")
recent = turns[-MAX_TURNS:]
context = "\n".join(recent)
if len(context) > MAX_CONTEXT_CHARS:
context = context[-MAX_CONTEXT_CHARS:]
boundary = context.find("\n**")
if boundary > 0:
context = context[boundary + 1 :]
return context, len(recent)
def main() -> None:
# Read hook input from stdin
# Claude Code on Windows may pass paths with unescaped backslashes
try:
raw_input = sys.stdin.read()
try:
hook_input: dict = json.loads(raw_input)
except json.JSONDecodeError:
fixed_input = re.sub(r'(?<!\\)\\(?!["\\])', r'\\\\', raw_input)
hook_input = json.loads(fixed_input)
except (json.JSONDecodeError, ValueError, EOFError) as e:
logging.error("Failed to parse stdin: %s", e)
return
session_id = hook_input.get("session_id", "unknown")
source = hook_input.get("source", "unknown")
transcript_path_str = hook_input.get("transcript_path", "")
logging.info("SessionEnd fired: session=%s source=%s", session_id, source)
if not transcript_path_str or not isinstance(transcript_path_str, str):
logging.info("SKIP: no transcript path")
return
transcript_path = Path(transcript_path_str)
if not transcript_path.exists():
logging.info("SKIP: transcript missing: %s", transcript_path_str)
return
# Extract conversation context in the hook (fast, no API calls)
try:
context, turn_count = extract_conversation_context(transcript_path)
except Exception as e:
logging.error("Context extraction failed: %s", e)
return
if not context.strip():
logging.info("SKIP: empty context")
return
if turn_count < MIN_TURNS_TO_FLUSH:
logging.info("SKIP: only %d turns (min %d)", turn_count, MIN_TURNS_TO_FLUSH)
return
# Write context to a temp file for the background process
timestamp = datetime.now(timezone.utc).astimezone().strftime("%Y%m%d-%H%M%S")
context_file = STATE_DIR / f"session-flush-{session_id}-{timestamp}.md"
context_file.write_text(context, encoding="utf-8")
# Spawn flush.py as a background process
flush_script = SCRIPTS_DIR / "flush.py"
cmd = [
"uv",
"run",
"--directory",
str(ROOT),
"python",
str(flush_script),
str(context_file),
session_id,
]
# On Windows, use CREATE_NO_WINDOW to avoid flash console window.
# Do NOT use DETACHED_PROCESS — it breaks the Agent SDK's subprocess I/O.
creation_flags = subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0
try:
subprocess.Popen(
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
creationflags=creation_flags,
)
logging.info("Spawned flush.py for session %s (%d turns, %d chars)", session_id, turn_count, len(context))
except Exception as e:
logging.error("Failed to spawn flush.py: %s", e)
if __name__ == "__main__":
main()

92
hooks/session-start.py Normal file
View file

@ -0,0 +1,92 @@
"""
SessionStart hook - injects knowledge base context into every conversation.
This is the "context injection" layer. When Claude Code starts a session,
this hook reads the knowledge base index and recent daily log, then injects
them as additional context so Claude always "remembers" what it has learned.
Configure in .claude/settings.json:
{
"hooks": {
"SessionStart": [{
"matcher": "",
"command": "uv run python hooks/session-start.py"
}]
}
}
"""
import json
import sys
from datetime import datetime, timedelta, timezone
from pathlib import Path
# Paths relative to project root
ROOT = Path(__file__).resolve().parent.parent
KNOWLEDGE_DIR = ROOT / "knowledge"
DAILY_DIR = ROOT / "daily"
INDEX_FILE = KNOWLEDGE_DIR / "index.md"
MAX_CONTEXT_CHARS = 20_000
MAX_LOG_LINES = 30
def get_recent_log() -> str:
"""Read the most recent daily log (today or yesterday)."""
today = datetime.now(timezone.utc).astimezone()
for offset in range(2):
date = today - timedelta(days=offset)
log_path = DAILY_DIR / f"{date.strftime('%Y-%m-%d')}.md"
if log_path.exists():
lines = log_path.read_text(encoding="utf-8").splitlines()
# Return last N lines to keep context small
recent = lines[-MAX_LOG_LINES:] if len(lines) > MAX_LOG_LINES else lines
return "\n".join(recent)
return "(no recent daily log)"
def build_context() -> str:
"""Assemble the context to inject into the conversation."""
parts = []
# Today's date
today = datetime.now(timezone.utc).astimezone()
parts.append(f"## Today\n{today.strftime('%A, %B %d, %Y')}")
# Knowledge base index (the core retrieval mechanism)
if INDEX_FILE.exists():
index_content = INDEX_FILE.read_text(encoding="utf-8")
parts.append(f"## Knowledge Base Index\n\n{index_content}")
else:
parts.append("## Knowledge Base Index\n\n(empty - no articles compiled yet)")
# Recent daily log
recent_log = get_recent_log()
parts.append(f"## Recent Daily Log\n\n{recent_log}")
context = "\n\n---\n\n".join(parts)
# Truncate if too long
if len(context) > MAX_CONTEXT_CHARS:
context = context[:MAX_CONTEXT_CHARS] + "\n\n...(truncated)"
return context
def main():
context = build_context()
output = {
"hookSpecificOutput": {
"hookEventName": "SessionStart",
"additionalContext": context,
}
}
print(json.dumps(output))
if __name__ == "__main__":
main()