Claude Code Memory Compiler

2026-04-06 09:26:30 -05:00 · 2026-04-06 09:26:30 -05:00 · f83d38d787
commit f83d38d787
15 changed files with 2819 additions and 0 deletions
--- a/scripts/compile.py
+++ b/scripts/compile.py
@ -0,0 +1,224 @@
+"""
+Compile daily conversation logs into structured knowledge articles.
+
+This is the "LLM compiler" - it reads daily logs (source code) and produces
+organized knowledge articles (the executable).
+
+Usage:
+    uv run python compile.py                    # compile new/changed logs only
+    uv run python compile.py --all              # force recompile everything
+    uv run python compile.py --file daily/2026-04-01.md  # compile a specific log
+    uv run python compile.py --dry-run          # show what would be compiled
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import sys
+from pathlib import Path
+
+from config import AGENTS_FILE, CONCEPTS_DIR, CONNECTIONS_DIR, DAILY_DIR, KNOWLEDGE_DIR, now_iso
+from utils import (
+    file_hash,
+    list_raw_files,
+    list_wiki_articles,
+    load_state,
+    read_wiki_index,
+    save_state,
+)
+
+# ── Paths for the LLM to use ──────────────────────────────────────────
+ROOT_DIR = Path(__file__).resolve().parent.parent
+
+
+async def compile_daily_log(log_path: Path, state: dict) -> float:
+    """Compile a single daily log into knowledge articles.
+
+    Returns the API cost of the compilation.
+    """
+    from claude_agent_sdk import (
+        AssistantMessage,
+        ClaudeAgentOptions,
+        ResultMessage,
+        TextBlock,
+        query,
+    )
+
+    log_content = log_path.read_text(encoding="utf-8")
+    schema = AGENTS_FILE.read_text(encoding="utf-8")
+    wiki_index = read_wiki_index()
+
+    # Read existing articles for context
+    existing_articles_context = ""
+    existing = {}
+    for article_path in list_wiki_articles():
+        rel = article_path.relative_to(KNOWLEDGE_DIR)
+        existing[str(rel)] = article_path.read_text(encoding="utf-8")
+
+    if existing:
+        parts = []
+        for rel_path, content in existing.items():
+            parts.append(f"### {rel_path}\n```markdown\n{content}\n```")
+        existing_articles_context = "\n\n".join(parts)
+
+    timestamp = now_iso()
+
+    prompt = f"""You are a knowledge compiler. Your job is to read a daily conversation log
+and extract knowledge into structured wiki articles.
+
+## Schema (AGENTS.md)
+
+{schema}
+
+## Current Wiki Index
+
+{wiki_index}
+
+## Existing Wiki Articles
+
+{existing_articles_context if existing_articles_context else "(No existing articles yet)"}
+
+## Daily Log to Compile
+
+**File:** {log_path.name}
+
+{log_content}
+
+## Your Task
+
+Read the daily log above and compile it into wiki articles following the schema exactly.
+
+### Rules:
+
+1. **Extract key concepts** - Identify 3-7 distinct concepts worth their own article
+2. **Create concept articles** in `knowledge/concepts/` - One .md file per concept
+   - Use the exact article format from AGENTS.md (YAML frontmatter + sections)
+   - Include `sources:` in frontmatter pointing to the daily log file
+   - Use `[[concepts/slug]]` wikilinks to link to related concepts
+   - Write in encyclopedia style - neutral, comprehensive
+3. **Create connection articles** in `knowledge/connections/` if this log reveals non-obvious
+   relationships between 2+ existing concepts
+4. **Update existing articles** if this log adds new information to concepts already in the wiki
+   - Read the existing article, add the new information, add the source to frontmatter
+5. **Update knowledge/index.md** - Add new entries to the table
+   - Each entry: `| [[path/slug]] | One-line summary | source-file | {timestamp[:10]} |`
+6. **Append to knowledge/log.md** - Add a timestamped entry:
+   ```
+   ## [{timestamp}] compile | {log_path.name}
+   - Source: daily/{log_path.name}
+   - Articles created: [[concepts/x]], [[concepts/y]]
+   - Articles updated: [[concepts/z]] (if any)
+   ```
+
+### File paths:
+- Write concept articles to: {CONCEPTS_DIR}
+- Write connection articles to: {CONNECTIONS_DIR}
+- Update index at: {KNOWLEDGE_DIR / 'index.md'}
+- Append log at: {KNOWLEDGE_DIR / 'log.md'}
+
+### Quality standards:
+- Every article must have complete YAML frontmatter
+- Every article must link to at least 2 other articles via [[wikilinks]]
+- Key Points section should have 3-5 bullet points
+- Details section should have 2+ paragraphs
+- Related Concepts section should have 2+ entries
+- Sources section should cite the daily log with specific claims extracted
+"""
+
+    cost = 0.0
+
+    try:
+        async for message in query(
+            prompt=prompt,
+            options=ClaudeAgentOptions(
+                cwd=str(ROOT_DIR),
+                system_prompt={"type": "preset", "preset": "claude_code"},
+                allowed_tools=["Read", "Write", "Edit", "Glob", "Grep"],
+                permission_mode="acceptEdits",
+                max_turns=30,
+            ),
+        ):
+            if isinstance(message, AssistantMessage):
+                for block in message.content:
+                    if isinstance(block, TextBlock):
+                        pass  # compilation output - LLM writes files directly
+            elif isinstance(message, ResultMessage):
+                cost = message.total_cost_usd or 0.0
+                print(f"  Cost: ${cost:.4f}")
+    except Exception as e:
+        print(f"  Error: {e}")
+        return 0.0
+
+    # Update state
+    rel_path = log_path.name
+    state.setdefault("ingested", {})[rel_path] = {
+        "hash": file_hash(log_path),
+        "compiled_at": now_iso(),
+        "cost_usd": cost,
+    }
+    state["total_cost"] = state.get("total_cost", 0.0) + cost
+    save_state(state)
+
+    return cost
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Compile daily logs into knowledge articles")
+    parser.add_argument("--all", action="store_true", help="Force recompile all logs")
+    parser.add_argument("--file", type=str, help="Compile a specific daily log file")
+    parser.add_argument("--dry-run", action="store_true", help="Show what would be compiled")
+    args = parser.parse_args()
+
+    state = load_state()
+
+    # Determine which files to compile
+    if args.file:
+        target = Path(args.file)
+        if not target.is_absolute():
+            target = DAILY_DIR / target.name
+        if not target.exists():
+            # Try resolving relative to project root
+            target = ROOT_DIR / args.file
+        if not target.exists():
+            print(f"Error: {args.file} not found")
+            sys.exit(1)
+        to_compile = [target]
+    else:
+        all_logs = list_raw_files()
+        if args.all:
+            to_compile = all_logs
+        else:
+            to_compile = []
+            for log_path in all_logs:
+                rel = log_path.name
+                prev = state.get("ingested", {}).get(rel, {})
+                if not prev or prev.get("hash") != file_hash(log_path):
+                    to_compile.append(log_path)
+
+    if not to_compile:
+        print("Nothing to compile - all daily logs are up to date.")
+        return
+
+    print(f"{'[DRY RUN] ' if args.dry_run else ''}Files to compile ({len(to_compile)}):")
+    for f in to_compile:
+        print(f"  - {f.name}")
+
+    if args.dry_run:
+        return
+
+    # Compile each file sequentially
+    total_cost = 0.0
+    for i, log_path in enumerate(to_compile, 1):
+        print(f"\n[{i}/{len(to_compile)}] Compiling {log_path.name}...")
+        cost = asyncio.run(compile_daily_log(log_path, state))
+        total_cost += cost
+        print(f"  Done.")
+
+    articles = list_wiki_articles()
+    print(f"\nCompilation complete. Total cost: ${total_cost:.2f}")
+    print(f"Knowledge base: {len(articles)} articles")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/config.py
+++ b/scripts/config.py
@ -0,0 +1,33 @@
+"""Path constants and configuration for the personal knowledge base."""
+
+from pathlib import Path
+from datetime import datetime, timezone
+
+# ── Paths ──────────────────────────────────────────────────────────────
+ROOT_DIR = Path(__file__).resolve().parent.parent
+DAILY_DIR = ROOT_DIR / "daily"
+KNOWLEDGE_DIR = ROOT_DIR / "knowledge"
+CONCEPTS_DIR = KNOWLEDGE_DIR / "concepts"
+CONNECTIONS_DIR = KNOWLEDGE_DIR / "connections"
+QA_DIR = KNOWLEDGE_DIR / "qa"
+REPORTS_DIR = ROOT_DIR / "reports"
+SCRIPTS_DIR = ROOT_DIR / "scripts"
+HOOKS_DIR = ROOT_DIR / "hooks"
+AGENTS_FILE = ROOT_DIR / "AGENTS.md"
+
+INDEX_FILE = KNOWLEDGE_DIR / "index.md"
+LOG_FILE = KNOWLEDGE_DIR / "log.md"
+STATE_FILE = SCRIPTS_DIR / "state.json"
+
+# ── Timezone ───────────────────────────────────────────────────────────
+TIMEZONE = "America/Chicago"
+
+
+def now_iso() -> str:
+    """Current time in ISO 8601 format."""
+    return datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds")
+
+
+def today_iso() -> str:
+    """Current date in ISO 8601 format."""
+    return datetime.now(timezone.utc).astimezone().strftime("%Y-%m-%d")
--- a/scripts/flush.py
+++ b/scripts/flush.py
@ -0,0 +1,255 @@
+"""
+Memory flush agent - extracts important knowledge from conversation context.
+
+Spawned by session-end.py or pre-compact.py as a background process. Reads
+pre-extracted conversation context from a .md file, uses the Claude Agent SDK
+to decide what's worth saving, and appends the result to today's daily log.
+
+Usage:
+    uv run python flush.py <context_file.md> <session_id>
+"""
+
+from __future__ import annotations
+
+# Recursion prevention: set this BEFORE any imports that might trigger Claude
+import os
+os.environ["CLAUDE_INVOKED_BY"] = "memory_flush"
+
+import asyncio
+import json
+import logging
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parent.parent
+DAILY_DIR = ROOT / "daily"
+SCRIPTS_DIR = ROOT / "scripts"
+STATE_FILE = SCRIPTS_DIR / "last-flush.json"
+LOG_FILE = SCRIPTS_DIR / "flush.log"
+
+# Set up file-based logging so we can verify the background process ran.
+# The parent process sends stdout/stderr to DEVNULL (to avoid the inherited
+# file handle bug on Windows), so this is our only observability channel.
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+
+def load_flush_state() -> dict:
+    if STATE_FILE.exists():
+        try:
+            return json.loads(STATE_FILE.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            pass
+    return {}
+
+
+def save_flush_state(state: dict) -> None:
+    STATE_FILE.write_text(json.dumps(state), encoding="utf-8")
+
+
+def append_to_daily_log(content: str, section: str = "Session") -> None:
+    """Append content to today's daily log."""
+    today = datetime.now(timezone.utc).astimezone()
+    log_path = DAILY_DIR / f"{today.strftime('%Y-%m-%d')}.md"
+
+    if not log_path.exists():
+        DAILY_DIR.mkdir(parents=True, exist_ok=True)
+        log_path.write_text(
+            f"# Daily Log: {today.strftime('%Y-%m-%d')}\n\n## Sessions\n\n## Memory Maintenance\n\n",
+            encoding="utf-8",
+        )
+
+    time_str = today.strftime("%H:%M")
+    entry = f"### {section} ({time_str})\n\n{content}\n\n"
+
+    with open(log_path, "a", encoding="utf-8") as f:
+        f.write(entry)
+
+
+async def run_flush(context: str) -> str:
+    """Use Claude Agent SDK to extract important knowledge from conversation context."""
+    from claude_agent_sdk import (
+        AssistantMessage,
+        ClaudeAgentOptions,
+        ResultMessage,
+        TextBlock,
+        query,
+    )
+
+    prompt = f"""Review the conversation context below and respond with a concise summary
+of important items that should be preserved in the daily log.
+Do NOT use any tools — just return plain text.
+
+Format your response as a structured daily log entry with these sections:
+
+**Context:** [One line about what the user was working on]
+
+**Key Exchanges:**
+- [Important Q&A or discussions]
+
+**Decisions Made:**
+- [Any decisions with rationale]
+
+**Lessons Learned:**
+- [Gotchas, patterns, or insights discovered]
+
+**Action Items:**
+- [Follow-ups or TODOs mentioned]
+
+Skip anything that is:
+- Routine tool calls or file reads
+- Content that's trivial or obvious
+- Trivial back-and-forth or clarification exchanges
+
+Only include sections that have actual content. If nothing is worth saving,
+respond with exactly: FLUSH_OK
+
+## Conversation Context
+
+{context}"""
+
+    response = ""
+
+    try:
+        async for message in query(
+            prompt=prompt,
+            options=ClaudeAgentOptions(
+                cwd=str(ROOT),
+                allowed_tools=[],
+                max_turns=2,
+            ),
+        ):
+            if isinstance(message, AssistantMessage):
+                for block in message.content:
+                    if isinstance(block, TextBlock):
+                        response += block.text
+            elif isinstance(message, ResultMessage):
+                pass
+    except Exception as e:
+        import traceback
+        logging.error("Agent SDK error: %s\n%s", e, traceback.format_exc())
+        response = f"FLUSH_ERROR: {type(e).__name__}: {e}"
+
+    return response
+
+
+COMPILE_AFTER_HOUR = 18  # 6 PM local time
+
+
+def maybe_trigger_compilation() -> None:
+    """If it's past the compile hour and today's log hasn't been compiled, run compile.py."""
+    import subprocess as _sp
+
+    now = datetime.now(timezone.utc).astimezone()
+    if now.hour < COMPILE_AFTER_HOUR:
+        return
+
+    # Check if today's log has already been compiled
+    today_log = f"{now.strftime('%Y-%m-%d')}.md"
+    compile_state_file = SCRIPTS_DIR / "state.json"
+    if compile_state_file.exists():
+        try:
+            compile_state = json.loads(compile_state_file.read_text(encoding="utf-8"))
+            ingested = compile_state.get("ingested", {})
+            if today_log in ingested:
+                # Already compiled today - check if the log has changed since
+                from hashlib import sha256
+                log_path = DAILY_DIR / today_log
+                if log_path.exists():
+                    current_hash = sha256(log_path.read_bytes()).hexdigest()[:16]
+                    if ingested[today_log].get("hash") == current_hash:
+                        return  # log unchanged since last compile
+        except (json.JSONDecodeError, OSError):
+            pass
+
+    compile_script = SCRIPTS_DIR / "compile.py"
+    if not compile_script.exists():
+        return
+
+    logging.info("End-of-day compilation triggered (after %d:00)", COMPILE_AFTER_HOUR)
+
+    cmd = ["uv", "run", "--directory", str(ROOT), "python", str(compile_script)]
+
+    kwargs: dict = {}
+    if sys.platform == "win32":
+        kwargs["creationflags"] = _sp.CREATE_NEW_PROCESS_GROUP | _sp.DETACHED_PROCESS
+    else:
+        kwargs["start_new_session"] = True
+
+    try:
+        log_handle = open(str(SCRIPTS_DIR / "compile.log"), "a")
+        _sp.Popen(cmd, stdout=log_handle, stderr=_sp.STDOUT, cwd=str(ROOT), **kwargs)
+    except Exception as e:
+        logging.error("Failed to spawn compile.py: %s", e)
+
+
+def main():
+    if len(sys.argv) < 3:
+        logging.error("Usage: %s <context_file.md> <session_id>", sys.argv[0])
+        sys.exit(1)
+
+    context_file = Path(sys.argv[1])
+    session_id = sys.argv[2]
+
+    logging.info("flush.py started for session %s, context: %s", session_id, context_file)
+
+    if not context_file.exists():
+        logging.error("Context file not found: %s", context_file)
+        return
+
+    # Deduplication: skip if same session was flushed within 60 seconds
+    state = load_flush_state()
+    if (
+        state.get("session_id") == session_id
+        and time.time() - state.get("timestamp", 0) < 60
+    ):
+        logging.info("Skipping duplicate flush for session %s", session_id)
+        context_file.unlink(missing_ok=True)
+        return
+
+    # Read pre-extracted context
+    context = context_file.read_text(encoding="utf-8").strip()
+    if not context:
+        logging.info("Context file is empty, skipping")
+        context_file.unlink(missing_ok=True)
+        return
+
+    logging.info("Flushing session %s: %d chars", session_id, len(context))
+
+    # Run the LLM extraction
+    response = asyncio.run(run_flush(context))
+
+    # Append to daily log
+    if "FLUSH_OK" in response:
+        logging.info("Result: FLUSH_OK")
+        append_to_daily_log(
+            "FLUSH_OK - Nothing worth saving from this session", "Memory Flush"
+        )
+    elif "FLUSH_ERROR" in response:
+        logging.error("Result: %s", response)
+        append_to_daily_log(response, "Memory Flush")
+    else:
+        logging.info("Result: saved to daily log (%d chars)", len(response))
+        append_to_daily_log(response, "Session")
+
+    # Update dedup state
+    save_flush_state({"session_id": session_id, "timestamp": time.time()})
+
+    # Clean up context file
+    context_file.unlink(missing_ok=True)
+
+    # End-of-day auto-compilation: if it's past the compile hour and today's
+    # log hasn't been compiled yet, trigger compile.py in the background.
+    maybe_trigger_compilation()
+
+    logging.info("Flush complete for session %s", session_id)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/lint.py
+++ b/scripts/lint.py
@ -0,0 +1,312 @@
+"""
+Lint the knowledge base for structural and semantic health.
+
+Runs 7 checks: broken links, orphan pages, orphan sources, stale articles,
+contradictions (LLM), missing backlinks, and sparse articles.
+
+Usage:
+    uv run python lint.py                    # all checks
+    uv run python lint.py --structural-only  # skip LLM checks (faster, cheaper)
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+from pathlib import Path
+
+from config import KNOWLEDGE_DIR, REPORTS_DIR, now_iso, today_iso
+from utils import (
+    count_inbound_links,
+    extract_wikilinks,
+    file_hash,
+    get_article_word_count,
+    list_raw_files,
+    list_wiki_articles,
+    load_state,
+    read_all_wiki_content,
+    save_state,
+    wiki_article_exists,
+)
+
+ROOT_DIR = Path(__file__).resolve().parent.parent
+
+
+def check_broken_links() -> list[dict]:
+    """Check for [[wikilinks]] that point to non-existent articles."""
+    issues = []
+    for article in list_wiki_articles():
+        content = article.read_text(encoding="utf-8")
+        rel = article.relative_to(KNOWLEDGE_DIR)
+        for link in extract_wikilinks(content):
+            if link.startswith("daily/"):
+                continue  # daily log references are valid
+            if not wiki_article_exists(link):
+                issues.append({
+                    "severity": "error",
+                    "check": "broken_link",
+                    "file": str(rel),
+                    "detail": f"Broken link: [[{link}]] - target does not exist",
+                })
+    return issues
+
+
+def check_orphan_pages() -> list[dict]:
+    """Check for articles with zero inbound links."""
+    issues = []
+    for article in list_wiki_articles():
+        rel = article.relative_to(KNOWLEDGE_DIR)
+        link_target = str(rel).replace(".md", "").replace("\\", "/")
+        inbound = count_inbound_links(link_target)
+        if inbound == 0:
+            issues.append({
+                "severity": "warning",
+                "check": "orphan_page",
+                "file": str(rel),
+                "detail": f"Orphan page: no other articles link to [[{link_target}]]",
+            })
+    return issues
+
+
+def check_orphan_sources() -> list[dict]:
+    """Check for daily logs that haven't been compiled yet."""
+    state = load_state()
+    ingested = state.get("ingested", {})
+    issues = []
+    for log_path in list_raw_files():
+        if log_path.name not in ingested:
+            issues.append({
+                "severity": "warning",
+                "check": "orphan_source",
+                "file": f"daily/{log_path.name}",
+                "detail": f"Uncompiled daily log: {log_path.name} has not been ingested",
+            })
+    return issues
+
+
+def check_stale_articles() -> list[dict]:
+    """Check if source daily logs have changed since compilation."""
+    state = load_state()
+    ingested = state.get("ingested", {})
+    issues = []
+    for log_path in list_raw_files():
+        rel = log_path.name
+        if rel in ingested:
+            stored_hash = ingested[rel].get("hash", "")
+            current_hash = file_hash(log_path)
+            if stored_hash != current_hash:
+                issues.append({
+                    "severity": "warning",
+                    "check": "stale_article",
+                    "file": f"daily/{rel}",
+                    "detail": f"Stale: {rel} has changed since last compilation",
+                })
+    return issues
+
+
+def check_missing_backlinks() -> list[dict]:
+    """Check for asymmetric links: A links to B but B doesn't link to A."""
+    issues = []
+    for article in list_wiki_articles():
+        content = article.read_text(encoding="utf-8")
+        rel = article.relative_to(KNOWLEDGE_DIR)
+        source_link = str(rel).replace(".md", "").replace("\\", "/")
+
+        for link in extract_wikilinks(content):
+            if link.startswith("daily/"):
+                continue
+            target_path = KNOWLEDGE_DIR / f"{link}.md"
+            if target_path.exists():
+                target_content = target_path.read_text(encoding="utf-8")
+                if f"[[{source_link}]]" not in target_content:
+                    issues.append({
+                        "severity": "suggestion",
+                        "check": "missing_backlink",
+                        "file": str(rel),
+                        "detail": f"[[{source_link}]] links to [[{link}]] but not vice versa",
+                        "auto_fixable": True,
+                    })
+    return issues
+
+
+def check_sparse_articles() -> list[dict]:
+    """Check for articles with fewer than 200 words."""
+    issues = []
+    for article in list_wiki_articles():
+        word_count = get_article_word_count(article)
+        if word_count < 200:
+            rel = article.relative_to(KNOWLEDGE_DIR)
+            issues.append({
+                "severity": "suggestion",
+                "check": "sparse_article",
+                "file": str(rel),
+                "detail": f"Sparse article: {word_count} words (minimum recommended: 200)",
+            })
+    return issues
+
+
+async def check_contradictions() -> list[dict]:
+    """Use LLM to detect contradictions across articles."""
+    from claude_agent_sdk import (
+        AssistantMessage,
+        ClaudeAgentOptions,
+        ResultMessage,
+        TextBlock,
+        query,
+    )
+
+    wiki_content = read_all_wiki_content()
+
+    prompt = f"""Review this knowledge base for contradictions, inconsistencies, or
+conflicting claims across articles.
+
+## Knowledge Base
+
+{wiki_content}
+
+## Instructions
+
+Look for:
+- Direct contradictions (article A says X, article B says not-X)
+- Inconsistent recommendations (different articles recommend conflicting approaches)
+- Outdated information that conflicts with newer entries
+
+For each issue found, output EXACTLY one line in this format:
+CONTRADICTION: [file1] vs [file2] - description of the conflict
+INCONSISTENCY: [file] - description of the inconsistency
+
+If no issues found, output exactly: NO_ISSUES
+
+Do NOT output anything else - no preamble, no explanation, just the formatted lines."""
+
+    response = ""
+    try:
+        async for message in query(
+            prompt=prompt,
+            options=ClaudeAgentOptions(
+                cwd=str(ROOT_DIR),
+                allowed_tools=[],
+                max_turns=2,
+            ),
+        ):
+            if isinstance(message, AssistantMessage):
+                for block in message.content:
+                    if isinstance(block, TextBlock):
+                        response += block.text
+    except Exception as e:
+        return [{"severity": "error", "check": "contradiction", "file": "(system)", "detail": f"LLM check failed: {e}"}]
+
+    issues = []
+    if "NO_ISSUES" not in response:
+        for line in response.strip().split("\n"):
+            line = line.strip()
+            if line.startswith("CONTRADICTION:") or line.startswith("INCONSISTENCY:"):
+                issues.append({
+                    "severity": "warning",
+                    "check": "contradiction",
+                    "file": "(cross-article)",
+                    "detail": line,
+                })
+
+    return issues
+
+
+def generate_report(all_issues: list[dict]) -> str:
+    """Generate a markdown lint report."""
+    errors = [i for i in all_issues if i["severity"] == "error"]
+    warnings = [i for i in all_issues if i["severity"] == "warning"]
+    suggestions = [i for i in all_issues if i["severity"] == "suggestion"]
+
+    lines = [
+        f"# Lint Report - {today_iso()}",
+        "",
+        f"**Total issues:** {len(all_issues)}",
+        f"- Errors: {len(errors)}",
+        f"- Warnings: {len(warnings)}",
+        f"- Suggestions: {len(suggestions)}",
+        "",
+    ]
+
+    for severity, issues, marker in [
+        ("Errors", errors, "x"),
+        ("Warnings", warnings, "!"),
+        ("Suggestions", suggestions, "?"),
+    ]:
+        if issues:
+            lines.append(f"## {severity}")
+            lines.append("")
+            for issue in issues:
+                fixable = " (auto-fixable)" if issue.get("auto_fixable") else ""
+                lines.append(f"- **[{marker}]** `{issue['file']}` - {issue['detail']}{fixable}")
+            lines.append("")
+
+    if not all_issues:
+        lines.append("All checks passed. Knowledge base is healthy.")
+        lines.append("")
+
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Lint the knowledge base")
+    parser.add_argument(
+        "--structural-only",
+        action="store_true",
+        help="Skip LLM-based checks (contradictions) - faster and free",
+    )
+    args = parser.parse_args()
+
+    print("Running knowledge base lint checks...")
+    all_issues: list[dict] = []
+
+    # Structural checks (free, instant)
+    checks = [
+        ("Broken links", check_broken_links),
+        ("Orphan pages", check_orphan_pages),
+        ("Orphan sources", check_orphan_sources),
+        ("Stale articles", check_stale_articles),
+        ("Missing backlinks", check_missing_backlinks),
+        ("Sparse articles", check_sparse_articles),
+    ]
+
+    for name, check_fn in checks:
+        print(f"  Checking: {name}...")
+        issues = check_fn()
+        all_issues.extend(issues)
+        print(f"    Found {len(issues)} issue(s)")
+
+    # LLM check (costs money)
+    if not args.structural_only:
+        print("  Checking: Contradictions (LLM)...")
+        issues = asyncio.run(check_contradictions())
+        all_issues.extend(issues)
+        print(f"    Found {len(issues)} issue(s)")
+    else:
+        print("  Skipping: Contradictions (--structural-only)")
+
+    # Generate and save report
+    report = generate_report(all_issues)
+    REPORTS_DIR.mkdir(parents=True, exist_ok=True)
+    report_path = REPORTS_DIR / f"lint-{today_iso()}.md"
+    report_path.write_text(report, encoding="utf-8")
+    print(f"\nReport saved to: {report_path}")
+
+    # Update state
+    state = load_state()
+    state["last_lint"] = now_iso()
+    save_state(state)
+
+    # Summary
+    errors = sum(1 for i in all_issues if i["severity"] == "error")
+    warnings = sum(1 for i in all_issues if i["severity"] == "warning")
+    suggestions = sum(1 for i in all_issues if i["severity"] == "suggestion")
+    print(f"\nResults: {errors} errors, {warnings} warnings, {suggestions} suggestions")
+
+    if errors > 0:
+        print("\nErrors found - knowledge base needs attention!")
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())
--- a/scripts/query.py
+++ b/scripts/query.py
@ -0,0 +1,138 @@
+"""
+Query the knowledge base using index-guided retrieval (no RAG).
+
+The LLM reads the index, picks relevant articles, and synthesizes an answer.
+No vector database, no embeddings, no chunking - just structured markdown
+and an index the LLM can reason over.
+
+Usage:
+    uv run python query.py "How should I handle auth redirects?"
+    uv run python query.py "What patterns do I use for API design?" --file-back
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+from pathlib import Path
+
+from config import KNOWLEDGE_DIR, QA_DIR, now_iso
+from utils import load_state, read_all_wiki_content, save_state
+
+ROOT_DIR = Path(__file__).resolve().parent.parent
+
+
+async def run_query(question: str, file_back: bool = False) -> str:
+    """Query the knowledge base and optionally file the answer back."""
+    from claude_agent_sdk import (
+        AssistantMessage,
+        ClaudeAgentOptions,
+        ResultMessage,
+        TextBlock,
+        query,
+    )
+
+    wiki_content = read_all_wiki_content()
+
+    tools = ["Read", "Glob", "Grep"]
+    if file_back:
+        tools.extend(["Write", "Edit"])
+
+    file_back_instructions = ""
+    if file_back:
+        timestamp = now_iso()
+        file_back_instructions = f"""
+
+## File Back Instructions
+
+After answering, do the following:
+1. Create a Q&A article at {QA_DIR}/ with the filename being a slugified version
+   of the question (e.g., knowledge/qa/how-to-handle-auth-redirects.md)
+2. Use the Q&A article format from the schema (frontmatter with title, question,
+   consulted articles, filed date)
+3. Update {KNOWLEDGE_DIR / 'index.md'} with a new row for this Q&A article
+4. Append to {KNOWLEDGE_DIR / 'log.md'}:
+   ## [{timestamp}] query (filed) | question summary
+   - Question: {question}
+   - Consulted: [[list of articles read]]
+   - Filed to: [[qa/article-name]]
+"""
+
+    prompt = f"""You are a knowledge base query engine. Answer the user's question by
+consulting the knowledge base below.
+
+## How to Answer
+
+1. Read the INDEX section first - it lists every article with a one-line summary
+2. Identify 3-10 articles that are relevant to the question
+3. Read those articles carefully (they're included below)
+4. Synthesize a clear, thorough answer
+5. Cite your sources using [[wikilinks]] (e.g., [[concepts/supabase-auth]])
+6. If the knowledge base doesn't contain relevant information, say so honestly
+
+## Knowledge Base
+
+{wiki_content}
+
+## Question
+
+{question}
+{file_back_instructions}"""
+
+    answer = ""
+    cost = 0.0
+
+    try:
+        async for message in query(
+            prompt=prompt,
+            options=ClaudeAgentOptions(
+                cwd=str(ROOT_DIR),
+                system_prompt={"type": "preset", "preset": "claude_code"},
+                allowed_tools=tools,
+                permission_mode="acceptEdits",
+                max_turns=15,
+            ),
+        ):
+            if isinstance(message, AssistantMessage):
+                for block in message.content:
+                    if isinstance(block, TextBlock):
+                        answer += block.text
+            elif isinstance(message, ResultMessage):
+                cost = message.total_cost_usd or 0.0
+    except Exception as e:
+        answer = f"Error querying knowledge base: {e}"
+
+    # Update state
+    state = load_state()
+    state["query_count"] = state.get("query_count", 0) + 1
+    state["total_cost"] = state.get("total_cost", 0.0) + cost
+    save_state(state)
+
+    return answer
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Query the personal knowledge base")
+    parser.add_argument("question", help="The question to ask")
+    parser.add_argument(
+        "--file-back",
+        action="store_true",
+        help="File the answer back into the knowledge base as a Q&A article",
+    )
+    args = parser.parse_args()
+
+    print(f"Question: {args.question}")
+    print(f"File back: {'yes' if args.file_back else 'no'}")
+    print("-" * 60)
+
+    answer = asyncio.run(run_query(args.question, file_back=args.file_back))
+    print(answer)
+
+    if args.file_back:
+        print("\n" + "-" * 60)
+        qa_count = len(list(QA_DIR.glob("*.md"))) if QA_DIR.exists() else 0
+        print(f"Answer filed to knowledge/qa/ ({qa_count} Q&A articles total)")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/utils.py
+++ b/scripts/utils.py
@ -0,0 +1,133 @@
+"""Shared utilities for the personal knowledge base."""
+
+import hashlib
+import json
+import re
+from pathlib import Path
+
+from config import (
+    CONCEPTS_DIR,
+    CONNECTIONS_DIR,
+    DAILY_DIR,
+    INDEX_FILE,
+    KNOWLEDGE_DIR,
+    LOG_FILE,
+    QA_DIR,
+    STATE_FILE,
+)
+
+
+# ── State management ──────────────────────────────────────────────────
+
+def load_state() -> dict:
+    """Load persistent state from state.json."""
+    if STATE_FILE.exists():
+        return json.loads(STATE_FILE.read_text(encoding="utf-8"))
+    return {"ingested": {}, "query_count": 0, "last_lint": None, "total_cost": 0.0}
+
+
+def save_state(state: dict) -> None:
+    """Save state to state.json."""
+    STATE_FILE.write_text(json.dumps(state, indent=2), encoding="utf-8")
+
+
+# ── File hashing ──────────────────────────────────────────────────────
+
+def file_hash(path: Path) -> str:
+    """SHA-256 hash of a file (first 16 hex chars)."""
+    return hashlib.sha256(path.read_bytes()).hexdigest()[:16]
+
+
+# ── Slug / naming ─────────────────────────────────────────────────────
+
+def slugify(text: str) -> str:
+    """Convert text to a filename-safe slug."""
+    text = text.lower().strip()
+    text = re.sub(r"[^\w\s-]", "", text)
+    text = re.sub(r"[\s_]+", "-", text)
+    text = re.sub(r"-+", "-", text)
+    return text.strip("-")
+
+
+# ── Wikilink helpers ──────────────────────────────────────────────────
+
+def extract_wikilinks(content: str) -> list[str]:
+    """Extract all [[wikilinks]] from markdown content."""
+    return re.findall(r"\[\[([^\]]+)\]\]", content)
+
+
+def wiki_article_exists(link: str) -> bool:
+    """Check if a wikilinked article exists on disk."""
+    path = KNOWLEDGE_DIR / f"{link}.md"
+    return path.exists()
+
+
+# ── Wiki content helpers ──────────────────────────────────────────────
+
+def read_wiki_index() -> str:
+    """Read the knowledge base index file."""
+    if INDEX_FILE.exists():
+        return INDEX_FILE.read_text(encoding="utf-8")
+    return "# Knowledge Base Index\n\n| Article | Summary | Compiled From | Updated |\n|---------|---------|---------------|---------|"
+
+
+def read_all_wiki_content() -> str:
+    """Read index + all wiki articles into a single string for context."""
+    parts = [f"## INDEX\n\n{read_wiki_index()}"]
+
+    for subdir in [CONCEPTS_DIR, CONNECTIONS_DIR, QA_DIR]:
+        if not subdir.exists():
+            continue
+        for md_file in sorted(subdir.glob("*.md")):
+            rel = md_file.relative_to(KNOWLEDGE_DIR)
+            content = md_file.read_text(encoding="utf-8")
+            parts.append(f"## {rel}\n\n{content}")
+
+    return "\n\n---\n\n".join(parts)
+
+
+def list_wiki_articles() -> list[Path]:
+    """List all wiki article files."""
+    articles = []
+    for subdir in [CONCEPTS_DIR, CONNECTIONS_DIR, QA_DIR]:
+        if subdir.exists():
+            articles.extend(sorted(subdir.glob("*.md")))
+    return articles
+
+
+def list_raw_files() -> list[Path]:
+    """List all daily log files."""
+    if not DAILY_DIR.exists():
+        return []
+    return sorted(DAILY_DIR.glob("*.md"))
+
+
+# ── Index helpers ─────────────────────────────────────────────────────
+
+def count_inbound_links(target: str, exclude_file: Path | None = None) -> int:
+    """Count how many wiki articles link to a given target."""
+    count = 0
+    for article in list_wiki_articles():
+        if article == exclude_file:
+            continue
+        content = article.read_text(encoding="utf-8")
+        if f"[[{target}]]" in content:
+            count += 1
+    return count
+
+
+def get_article_word_count(path: Path) -> int:
+    """Count words in an article, excluding YAML frontmatter."""
+    content = path.read_text(encoding="utf-8")
+    # Strip frontmatter
+    if content.startswith("---"):
+        end = content.find("---", 3)
+        if end != -1:
+            content = content[end + 3:]
+    return len(content.split())
+
+
+def build_index_entry(rel_path: str, summary: str, sources: str, updated: str) -> str:
+    """Build a single index table row."""
+    link = rel_path.replace(".md", "")
+    return f"| [[{link}]] | {summary} | {sources} | {updated} |"