Claude Code Memory Compiler
This commit is contained in:
commit
f83d38d787
15 changed files with 2819 additions and 0 deletions
224
scripts/compile.py
Normal file
224
scripts/compile.py
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
"""
|
||||
Compile daily conversation logs into structured knowledge articles.
|
||||
|
||||
This is the "LLM compiler" - it reads daily logs (source code) and produces
|
||||
organized knowledge articles (the executable).
|
||||
|
||||
Usage:
|
||||
uv run python compile.py # compile new/changed logs only
|
||||
uv run python compile.py --all # force recompile everything
|
||||
uv run python compile.py --file daily/2026-04-01.md # compile a specific log
|
||||
uv run python compile.py --dry-run # show what would be compiled
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from config import AGENTS_FILE, CONCEPTS_DIR, CONNECTIONS_DIR, DAILY_DIR, KNOWLEDGE_DIR, now_iso
|
||||
from utils import (
|
||||
file_hash,
|
||||
list_raw_files,
|
||||
list_wiki_articles,
|
||||
load_state,
|
||||
read_wiki_index,
|
||||
save_state,
|
||||
)
|
||||
|
||||
# ── Paths for the LLM to use ──────────────────────────────────────────
|
||||
ROOT_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
async def compile_daily_log(log_path: Path, state: dict) -> float:
|
||||
"""Compile a single daily log into knowledge articles.
|
||||
|
||||
Returns the API cost of the compilation.
|
||||
"""
|
||||
from claude_agent_sdk import (
|
||||
AssistantMessage,
|
||||
ClaudeAgentOptions,
|
||||
ResultMessage,
|
||||
TextBlock,
|
||||
query,
|
||||
)
|
||||
|
||||
log_content = log_path.read_text(encoding="utf-8")
|
||||
schema = AGENTS_FILE.read_text(encoding="utf-8")
|
||||
wiki_index = read_wiki_index()
|
||||
|
||||
# Read existing articles for context
|
||||
existing_articles_context = ""
|
||||
existing = {}
|
||||
for article_path in list_wiki_articles():
|
||||
rel = article_path.relative_to(KNOWLEDGE_DIR)
|
||||
existing[str(rel)] = article_path.read_text(encoding="utf-8")
|
||||
|
||||
if existing:
|
||||
parts = []
|
||||
for rel_path, content in existing.items():
|
||||
parts.append(f"### {rel_path}\n```markdown\n{content}\n```")
|
||||
existing_articles_context = "\n\n".join(parts)
|
||||
|
||||
timestamp = now_iso()
|
||||
|
||||
prompt = f"""You are a knowledge compiler. Your job is to read a daily conversation log
|
||||
and extract knowledge into structured wiki articles.
|
||||
|
||||
## Schema (AGENTS.md)
|
||||
|
||||
{schema}
|
||||
|
||||
## Current Wiki Index
|
||||
|
||||
{wiki_index}
|
||||
|
||||
## Existing Wiki Articles
|
||||
|
||||
{existing_articles_context if existing_articles_context else "(No existing articles yet)"}
|
||||
|
||||
## Daily Log to Compile
|
||||
|
||||
**File:** {log_path.name}
|
||||
|
||||
{log_content}
|
||||
|
||||
## Your Task
|
||||
|
||||
Read the daily log above and compile it into wiki articles following the schema exactly.
|
||||
|
||||
### Rules:
|
||||
|
||||
1. **Extract key concepts** - Identify 3-7 distinct concepts worth their own article
|
||||
2. **Create concept articles** in `knowledge/concepts/` - One .md file per concept
|
||||
- Use the exact article format from AGENTS.md (YAML frontmatter + sections)
|
||||
- Include `sources:` in frontmatter pointing to the daily log file
|
||||
- Use `[[concepts/slug]]` wikilinks to link to related concepts
|
||||
- Write in encyclopedia style - neutral, comprehensive
|
||||
3. **Create connection articles** in `knowledge/connections/` if this log reveals non-obvious
|
||||
relationships between 2+ existing concepts
|
||||
4. **Update existing articles** if this log adds new information to concepts already in the wiki
|
||||
- Read the existing article, add the new information, add the source to frontmatter
|
||||
5. **Update knowledge/index.md** - Add new entries to the table
|
||||
- Each entry: `| [[path/slug]] | One-line summary | source-file | {timestamp[:10]} |`
|
||||
6. **Append to knowledge/log.md** - Add a timestamped entry:
|
||||
```
|
||||
## [{timestamp}] compile | {log_path.name}
|
||||
- Source: daily/{log_path.name}
|
||||
- Articles created: [[concepts/x]], [[concepts/y]]
|
||||
- Articles updated: [[concepts/z]] (if any)
|
||||
```
|
||||
|
||||
### File paths:
|
||||
- Write concept articles to: {CONCEPTS_DIR}
|
||||
- Write connection articles to: {CONNECTIONS_DIR}
|
||||
- Update index at: {KNOWLEDGE_DIR / 'index.md'}
|
||||
- Append log at: {KNOWLEDGE_DIR / 'log.md'}
|
||||
|
||||
### Quality standards:
|
||||
- Every article must have complete YAML frontmatter
|
||||
- Every article must link to at least 2 other articles via [[wikilinks]]
|
||||
- Key Points section should have 3-5 bullet points
|
||||
- Details section should have 2+ paragraphs
|
||||
- Related Concepts section should have 2+ entries
|
||||
- Sources section should cite the daily log with specific claims extracted
|
||||
"""
|
||||
|
||||
cost = 0.0
|
||||
|
||||
try:
|
||||
async for message in query(
|
||||
prompt=prompt,
|
||||
options=ClaudeAgentOptions(
|
||||
cwd=str(ROOT_DIR),
|
||||
system_prompt={"type": "preset", "preset": "claude_code"},
|
||||
allowed_tools=["Read", "Write", "Edit", "Glob", "Grep"],
|
||||
permission_mode="acceptEdits",
|
||||
max_turns=30,
|
||||
),
|
||||
):
|
||||
if isinstance(message, AssistantMessage):
|
||||
for block in message.content:
|
||||
if isinstance(block, TextBlock):
|
||||
pass # compilation output - LLM writes files directly
|
||||
elif isinstance(message, ResultMessage):
|
||||
cost = message.total_cost_usd or 0.0
|
||||
print(f" Cost: ${cost:.4f}")
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
return 0.0
|
||||
|
||||
# Update state
|
||||
rel_path = log_path.name
|
||||
state.setdefault("ingested", {})[rel_path] = {
|
||||
"hash": file_hash(log_path),
|
||||
"compiled_at": now_iso(),
|
||||
"cost_usd": cost,
|
||||
}
|
||||
state["total_cost"] = state.get("total_cost", 0.0) + cost
|
||||
save_state(state)
|
||||
|
||||
return cost
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Compile daily logs into knowledge articles")
|
||||
parser.add_argument("--all", action="store_true", help="Force recompile all logs")
|
||||
parser.add_argument("--file", type=str, help="Compile a specific daily log file")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show what would be compiled")
|
||||
args = parser.parse_args()
|
||||
|
||||
state = load_state()
|
||||
|
||||
# Determine which files to compile
|
||||
if args.file:
|
||||
target = Path(args.file)
|
||||
if not target.is_absolute():
|
||||
target = DAILY_DIR / target.name
|
||||
if not target.exists():
|
||||
# Try resolving relative to project root
|
||||
target = ROOT_DIR / args.file
|
||||
if not target.exists():
|
||||
print(f"Error: {args.file} not found")
|
||||
sys.exit(1)
|
||||
to_compile = [target]
|
||||
else:
|
||||
all_logs = list_raw_files()
|
||||
if args.all:
|
||||
to_compile = all_logs
|
||||
else:
|
||||
to_compile = []
|
||||
for log_path in all_logs:
|
||||
rel = log_path.name
|
||||
prev = state.get("ingested", {}).get(rel, {})
|
||||
if not prev or prev.get("hash") != file_hash(log_path):
|
||||
to_compile.append(log_path)
|
||||
|
||||
if not to_compile:
|
||||
print("Nothing to compile - all daily logs are up to date.")
|
||||
return
|
||||
|
||||
print(f"{'[DRY RUN] ' if args.dry_run else ''}Files to compile ({len(to_compile)}):")
|
||||
for f in to_compile:
|
||||
print(f" - {f.name}")
|
||||
|
||||
if args.dry_run:
|
||||
return
|
||||
|
||||
# Compile each file sequentially
|
||||
total_cost = 0.0
|
||||
for i, log_path in enumerate(to_compile, 1):
|
||||
print(f"\n[{i}/{len(to_compile)}] Compiling {log_path.name}...")
|
||||
cost = asyncio.run(compile_daily_log(log_path, state))
|
||||
total_cost += cost
|
||||
print(f" Done.")
|
||||
|
||||
articles = list_wiki_articles()
|
||||
print(f"\nCompilation complete. Total cost: ${total_cost:.2f}")
|
||||
print(f"Knowledge base: {len(articles)} articles")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
33
scripts/config.py
Normal file
33
scripts/config.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
"""Path constants and configuration for the personal knowledge base."""
|
||||
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
|
||||
# ── Paths ──────────────────────────────────────────────────────────────
|
||||
ROOT_DIR = Path(__file__).resolve().parent.parent
|
||||
DAILY_DIR = ROOT_DIR / "daily"
|
||||
KNOWLEDGE_DIR = ROOT_DIR / "knowledge"
|
||||
CONCEPTS_DIR = KNOWLEDGE_DIR / "concepts"
|
||||
CONNECTIONS_DIR = KNOWLEDGE_DIR / "connections"
|
||||
QA_DIR = KNOWLEDGE_DIR / "qa"
|
||||
REPORTS_DIR = ROOT_DIR / "reports"
|
||||
SCRIPTS_DIR = ROOT_DIR / "scripts"
|
||||
HOOKS_DIR = ROOT_DIR / "hooks"
|
||||
AGENTS_FILE = ROOT_DIR / "AGENTS.md"
|
||||
|
||||
INDEX_FILE = KNOWLEDGE_DIR / "index.md"
|
||||
LOG_FILE = KNOWLEDGE_DIR / "log.md"
|
||||
STATE_FILE = SCRIPTS_DIR / "state.json"
|
||||
|
||||
# ── Timezone ───────────────────────────────────────────────────────────
|
||||
TIMEZONE = "America/Chicago"
|
||||
|
||||
|
||||
def now_iso() -> str:
|
||||
"""Current time in ISO 8601 format."""
|
||||
return datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds")
|
||||
|
||||
|
||||
def today_iso() -> str:
|
||||
"""Current date in ISO 8601 format."""
|
||||
return datetime.now(timezone.utc).astimezone().strftime("%Y-%m-%d")
|
||||
255
scripts/flush.py
Normal file
255
scripts/flush.py
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
"""
|
||||
Memory flush agent - extracts important knowledge from conversation context.
|
||||
|
||||
Spawned by session-end.py or pre-compact.py as a background process. Reads
|
||||
pre-extracted conversation context from a .md file, uses the Claude Agent SDK
|
||||
to decide what's worth saving, and appends the result to today's daily log.
|
||||
|
||||
Usage:
|
||||
uv run python flush.py <context_file.md> <session_id>
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# Recursion prevention: set this BEFORE any imports that might trigger Claude
|
||||
import os
|
||||
os.environ["CLAUDE_INVOKED_BY"] = "memory_flush"
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
DAILY_DIR = ROOT / "daily"
|
||||
SCRIPTS_DIR = ROOT / "scripts"
|
||||
STATE_FILE = SCRIPTS_DIR / "last-flush.json"
|
||||
LOG_FILE = SCRIPTS_DIR / "flush.log"
|
||||
|
||||
# Set up file-based logging so we can verify the background process ran.
|
||||
# The parent process sends stdout/stderr to DEVNULL (to avoid the inherited
|
||||
# file handle bug on Windows), so this is our only observability channel.
|
||||
logging.basicConfig(
|
||||
filename=str(LOG_FILE),
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
|
||||
|
||||
def load_flush_state() -> dict:
|
||||
if STATE_FILE.exists():
|
||||
try:
|
||||
return json.loads(STATE_FILE.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def save_flush_state(state: dict) -> None:
|
||||
STATE_FILE.write_text(json.dumps(state), encoding="utf-8")
|
||||
|
||||
|
||||
def append_to_daily_log(content: str, section: str = "Session") -> None:
|
||||
"""Append content to today's daily log."""
|
||||
today = datetime.now(timezone.utc).astimezone()
|
||||
log_path = DAILY_DIR / f"{today.strftime('%Y-%m-%d')}.md"
|
||||
|
||||
if not log_path.exists():
|
||||
DAILY_DIR.mkdir(parents=True, exist_ok=True)
|
||||
log_path.write_text(
|
||||
f"# Daily Log: {today.strftime('%Y-%m-%d')}\n\n## Sessions\n\n## Memory Maintenance\n\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
time_str = today.strftime("%H:%M")
|
||||
entry = f"### {section} ({time_str})\n\n{content}\n\n"
|
||||
|
||||
with open(log_path, "a", encoding="utf-8") as f:
|
||||
f.write(entry)
|
||||
|
||||
|
||||
async def run_flush(context: str) -> str:
|
||||
"""Use Claude Agent SDK to extract important knowledge from conversation context."""
|
||||
from claude_agent_sdk import (
|
||||
AssistantMessage,
|
||||
ClaudeAgentOptions,
|
||||
ResultMessage,
|
||||
TextBlock,
|
||||
query,
|
||||
)
|
||||
|
||||
prompt = f"""Review the conversation context below and respond with a concise summary
|
||||
of important items that should be preserved in the daily log.
|
||||
Do NOT use any tools — just return plain text.
|
||||
|
||||
Format your response as a structured daily log entry with these sections:
|
||||
|
||||
**Context:** [One line about what the user was working on]
|
||||
|
||||
**Key Exchanges:**
|
||||
- [Important Q&A or discussions]
|
||||
|
||||
**Decisions Made:**
|
||||
- [Any decisions with rationale]
|
||||
|
||||
**Lessons Learned:**
|
||||
- [Gotchas, patterns, or insights discovered]
|
||||
|
||||
**Action Items:**
|
||||
- [Follow-ups or TODOs mentioned]
|
||||
|
||||
Skip anything that is:
|
||||
- Routine tool calls or file reads
|
||||
- Content that's trivial or obvious
|
||||
- Trivial back-and-forth or clarification exchanges
|
||||
|
||||
Only include sections that have actual content. If nothing is worth saving,
|
||||
respond with exactly: FLUSH_OK
|
||||
|
||||
## Conversation Context
|
||||
|
||||
{context}"""
|
||||
|
||||
response = ""
|
||||
|
||||
try:
|
||||
async for message in query(
|
||||
prompt=prompt,
|
||||
options=ClaudeAgentOptions(
|
||||
cwd=str(ROOT),
|
||||
allowed_tools=[],
|
||||
max_turns=2,
|
||||
),
|
||||
):
|
||||
if isinstance(message, AssistantMessage):
|
||||
for block in message.content:
|
||||
if isinstance(block, TextBlock):
|
||||
response += block.text
|
||||
elif isinstance(message, ResultMessage):
|
||||
pass
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logging.error("Agent SDK error: %s\n%s", e, traceback.format_exc())
|
||||
response = f"FLUSH_ERROR: {type(e).__name__}: {e}"
|
||||
|
||||
return response
|
||||
|
||||
|
||||
COMPILE_AFTER_HOUR = 18 # 6 PM local time
|
||||
|
||||
|
||||
def maybe_trigger_compilation() -> None:
|
||||
"""If it's past the compile hour and today's log hasn't been compiled, run compile.py."""
|
||||
import subprocess as _sp
|
||||
|
||||
now = datetime.now(timezone.utc).astimezone()
|
||||
if now.hour < COMPILE_AFTER_HOUR:
|
||||
return
|
||||
|
||||
# Check if today's log has already been compiled
|
||||
today_log = f"{now.strftime('%Y-%m-%d')}.md"
|
||||
compile_state_file = SCRIPTS_DIR / "state.json"
|
||||
if compile_state_file.exists():
|
||||
try:
|
||||
compile_state = json.loads(compile_state_file.read_text(encoding="utf-8"))
|
||||
ingested = compile_state.get("ingested", {})
|
||||
if today_log in ingested:
|
||||
# Already compiled today - check if the log has changed since
|
||||
from hashlib import sha256
|
||||
log_path = DAILY_DIR / today_log
|
||||
if log_path.exists():
|
||||
current_hash = sha256(log_path.read_bytes()).hexdigest()[:16]
|
||||
if ingested[today_log].get("hash") == current_hash:
|
||||
return # log unchanged since last compile
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass
|
||||
|
||||
compile_script = SCRIPTS_DIR / "compile.py"
|
||||
if not compile_script.exists():
|
||||
return
|
||||
|
||||
logging.info("End-of-day compilation triggered (after %d:00)", COMPILE_AFTER_HOUR)
|
||||
|
||||
cmd = ["uv", "run", "--directory", str(ROOT), "python", str(compile_script)]
|
||||
|
||||
kwargs: dict = {}
|
||||
if sys.platform == "win32":
|
||||
kwargs["creationflags"] = _sp.CREATE_NEW_PROCESS_GROUP | _sp.DETACHED_PROCESS
|
||||
else:
|
||||
kwargs["start_new_session"] = True
|
||||
|
||||
try:
|
||||
log_handle = open(str(SCRIPTS_DIR / "compile.log"), "a")
|
||||
_sp.Popen(cmd, stdout=log_handle, stderr=_sp.STDOUT, cwd=str(ROOT), **kwargs)
|
||||
except Exception as e:
|
||||
logging.error("Failed to spawn compile.py: %s", e)
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 3:
|
||||
logging.error("Usage: %s <context_file.md> <session_id>", sys.argv[0])
|
||||
sys.exit(1)
|
||||
|
||||
context_file = Path(sys.argv[1])
|
||||
session_id = sys.argv[2]
|
||||
|
||||
logging.info("flush.py started for session %s, context: %s", session_id, context_file)
|
||||
|
||||
if not context_file.exists():
|
||||
logging.error("Context file not found: %s", context_file)
|
||||
return
|
||||
|
||||
# Deduplication: skip if same session was flushed within 60 seconds
|
||||
state = load_flush_state()
|
||||
if (
|
||||
state.get("session_id") == session_id
|
||||
and time.time() - state.get("timestamp", 0) < 60
|
||||
):
|
||||
logging.info("Skipping duplicate flush for session %s", session_id)
|
||||
context_file.unlink(missing_ok=True)
|
||||
return
|
||||
|
||||
# Read pre-extracted context
|
||||
context = context_file.read_text(encoding="utf-8").strip()
|
||||
if not context:
|
||||
logging.info("Context file is empty, skipping")
|
||||
context_file.unlink(missing_ok=True)
|
||||
return
|
||||
|
||||
logging.info("Flushing session %s: %d chars", session_id, len(context))
|
||||
|
||||
# Run the LLM extraction
|
||||
response = asyncio.run(run_flush(context))
|
||||
|
||||
# Append to daily log
|
||||
if "FLUSH_OK" in response:
|
||||
logging.info("Result: FLUSH_OK")
|
||||
append_to_daily_log(
|
||||
"FLUSH_OK - Nothing worth saving from this session", "Memory Flush"
|
||||
)
|
||||
elif "FLUSH_ERROR" in response:
|
||||
logging.error("Result: %s", response)
|
||||
append_to_daily_log(response, "Memory Flush")
|
||||
else:
|
||||
logging.info("Result: saved to daily log (%d chars)", len(response))
|
||||
append_to_daily_log(response, "Session")
|
||||
|
||||
# Update dedup state
|
||||
save_flush_state({"session_id": session_id, "timestamp": time.time()})
|
||||
|
||||
# Clean up context file
|
||||
context_file.unlink(missing_ok=True)
|
||||
|
||||
# End-of-day auto-compilation: if it's past the compile hour and today's
|
||||
# log hasn't been compiled yet, trigger compile.py in the background.
|
||||
maybe_trigger_compilation()
|
||||
|
||||
logging.info("Flush complete for session %s", session_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
312
scripts/lint.py
Normal file
312
scripts/lint.py
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
"""
|
||||
Lint the knowledge base for structural and semantic health.
|
||||
|
||||
Runs 7 checks: broken links, orphan pages, orphan sources, stale articles,
|
||||
contradictions (LLM), missing backlinks, and sparse articles.
|
||||
|
||||
Usage:
|
||||
uv run python lint.py # all checks
|
||||
uv run python lint.py --structural-only # skip LLM checks (faster, cheaper)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from config import KNOWLEDGE_DIR, REPORTS_DIR, now_iso, today_iso
|
||||
from utils import (
|
||||
count_inbound_links,
|
||||
extract_wikilinks,
|
||||
file_hash,
|
||||
get_article_word_count,
|
||||
list_raw_files,
|
||||
list_wiki_articles,
|
||||
load_state,
|
||||
read_all_wiki_content,
|
||||
save_state,
|
||||
wiki_article_exists,
|
||||
)
|
||||
|
||||
ROOT_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def check_broken_links() -> list[dict]:
|
||||
"""Check for [[wikilinks]] that point to non-existent articles."""
|
||||
issues = []
|
||||
for article in list_wiki_articles():
|
||||
content = article.read_text(encoding="utf-8")
|
||||
rel = article.relative_to(KNOWLEDGE_DIR)
|
||||
for link in extract_wikilinks(content):
|
||||
if link.startswith("daily/"):
|
||||
continue # daily log references are valid
|
||||
if not wiki_article_exists(link):
|
||||
issues.append({
|
||||
"severity": "error",
|
||||
"check": "broken_link",
|
||||
"file": str(rel),
|
||||
"detail": f"Broken link: [[{link}]] - target does not exist",
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
def check_orphan_pages() -> list[dict]:
|
||||
"""Check for articles with zero inbound links."""
|
||||
issues = []
|
||||
for article in list_wiki_articles():
|
||||
rel = article.relative_to(KNOWLEDGE_DIR)
|
||||
link_target = str(rel).replace(".md", "").replace("\\", "/")
|
||||
inbound = count_inbound_links(link_target)
|
||||
if inbound == 0:
|
||||
issues.append({
|
||||
"severity": "warning",
|
||||
"check": "orphan_page",
|
||||
"file": str(rel),
|
||||
"detail": f"Orphan page: no other articles link to [[{link_target}]]",
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
def check_orphan_sources() -> list[dict]:
|
||||
"""Check for daily logs that haven't been compiled yet."""
|
||||
state = load_state()
|
||||
ingested = state.get("ingested", {})
|
||||
issues = []
|
||||
for log_path in list_raw_files():
|
||||
if log_path.name not in ingested:
|
||||
issues.append({
|
||||
"severity": "warning",
|
||||
"check": "orphan_source",
|
||||
"file": f"daily/{log_path.name}",
|
||||
"detail": f"Uncompiled daily log: {log_path.name} has not been ingested",
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
def check_stale_articles() -> list[dict]:
|
||||
"""Check if source daily logs have changed since compilation."""
|
||||
state = load_state()
|
||||
ingested = state.get("ingested", {})
|
||||
issues = []
|
||||
for log_path in list_raw_files():
|
||||
rel = log_path.name
|
||||
if rel in ingested:
|
||||
stored_hash = ingested[rel].get("hash", "")
|
||||
current_hash = file_hash(log_path)
|
||||
if stored_hash != current_hash:
|
||||
issues.append({
|
||||
"severity": "warning",
|
||||
"check": "stale_article",
|
||||
"file": f"daily/{rel}",
|
||||
"detail": f"Stale: {rel} has changed since last compilation",
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
def check_missing_backlinks() -> list[dict]:
|
||||
"""Check for asymmetric links: A links to B but B doesn't link to A."""
|
||||
issues = []
|
||||
for article in list_wiki_articles():
|
||||
content = article.read_text(encoding="utf-8")
|
||||
rel = article.relative_to(KNOWLEDGE_DIR)
|
||||
source_link = str(rel).replace(".md", "").replace("\\", "/")
|
||||
|
||||
for link in extract_wikilinks(content):
|
||||
if link.startswith("daily/"):
|
||||
continue
|
||||
target_path = KNOWLEDGE_DIR / f"{link}.md"
|
||||
if target_path.exists():
|
||||
target_content = target_path.read_text(encoding="utf-8")
|
||||
if f"[[{source_link}]]" not in target_content:
|
||||
issues.append({
|
||||
"severity": "suggestion",
|
||||
"check": "missing_backlink",
|
||||
"file": str(rel),
|
||||
"detail": f"[[{source_link}]] links to [[{link}]] but not vice versa",
|
||||
"auto_fixable": True,
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
def check_sparse_articles() -> list[dict]:
|
||||
"""Check for articles with fewer than 200 words."""
|
||||
issues = []
|
||||
for article in list_wiki_articles():
|
||||
word_count = get_article_word_count(article)
|
||||
if word_count < 200:
|
||||
rel = article.relative_to(KNOWLEDGE_DIR)
|
||||
issues.append({
|
||||
"severity": "suggestion",
|
||||
"check": "sparse_article",
|
||||
"file": str(rel),
|
||||
"detail": f"Sparse article: {word_count} words (minimum recommended: 200)",
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
async def check_contradictions() -> list[dict]:
|
||||
"""Use LLM to detect contradictions across articles."""
|
||||
from claude_agent_sdk import (
|
||||
AssistantMessage,
|
||||
ClaudeAgentOptions,
|
||||
ResultMessage,
|
||||
TextBlock,
|
||||
query,
|
||||
)
|
||||
|
||||
wiki_content = read_all_wiki_content()
|
||||
|
||||
prompt = f"""Review this knowledge base for contradictions, inconsistencies, or
|
||||
conflicting claims across articles.
|
||||
|
||||
## Knowledge Base
|
||||
|
||||
{wiki_content}
|
||||
|
||||
## Instructions
|
||||
|
||||
Look for:
|
||||
- Direct contradictions (article A says X, article B says not-X)
|
||||
- Inconsistent recommendations (different articles recommend conflicting approaches)
|
||||
- Outdated information that conflicts with newer entries
|
||||
|
||||
For each issue found, output EXACTLY one line in this format:
|
||||
CONTRADICTION: [file1] vs [file2] - description of the conflict
|
||||
INCONSISTENCY: [file] - description of the inconsistency
|
||||
|
||||
If no issues found, output exactly: NO_ISSUES
|
||||
|
||||
Do NOT output anything else - no preamble, no explanation, just the formatted lines."""
|
||||
|
||||
response = ""
|
||||
try:
|
||||
async for message in query(
|
||||
prompt=prompt,
|
||||
options=ClaudeAgentOptions(
|
||||
cwd=str(ROOT_DIR),
|
||||
allowed_tools=[],
|
||||
max_turns=2,
|
||||
),
|
||||
):
|
||||
if isinstance(message, AssistantMessage):
|
||||
for block in message.content:
|
||||
if isinstance(block, TextBlock):
|
||||
response += block.text
|
||||
except Exception as e:
|
||||
return [{"severity": "error", "check": "contradiction", "file": "(system)", "detail": f"LLM check failed: {e}"}]
|
||||
|
||||
issues = []
|
||||
if "NO_ISSUES" not in response:
|
||||
for line in response.strip().split("\n"):
|
||||
line = line.strip()
|
||||
if line.startswith("CONTRADICTION:") or line.startswith("INCONSISTENCY:"):
|
||||
issues.append({
|
||||
"severity": "warning",
|
||||
"check": "contradiction",
|
||||
"file": "(cross-article)",
|
||||
"detail": line,
|
||||
})
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def generate_report(all_issues: list[dict]) -> str:
|
||||
"""Generate a markdown lint report."""
|
||||
errors = [i for i in all_issues if i["severity"] == "error"]
|
||||
warnings = [i for i in all_issues if i["severity"] == "warning"]
|
||||
suggestions = [i for i in all_issues if i["severity"] == "suggestion"]
|
||||
|
||||
lines = [
|
||||
f"# Lint Report - {today_iso()}",
|
||||
"",
|
||||
f"**Total issues:** {len(all_issues)}",
|
||||
f"- Errors: {len(errors)}",
|
||||
f"- Warnings: {len(warnings)}",
|
||||
f"- Suggestions: {len(suggestions)}",
|
||||
"",
|
||||
]
|
||||
|
||||
for severity, issues, marker in [
|
||||
("Errors", errors, "x"),
|
||||
("Warnings", warnings, "!"),
|
||||
("Suggestions", suggestions, "?"),
|
||||
]:
|
||||
if issues:
|
||||
lines.append(f"## {severity}")
|
||||
lines.append("")
|
||||
for issue in issues:
|
||||
fixable = " (auto-fixable)" if issue.get("auto_fixable") else ""
|
||||
lines.append(f"- **[{marker}]** `{issue['file']}` - {issue['detail']}{fixable}")
|
||||
lines.append("")
|
||||
|
||||
if not all_issues:
|
||||
lines.append("All checks passed. Knowledge base is healthy.")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Lint the knowledge base")
|
||||
parser.add_argument(
|
||||
"--structural-only",
|
||||
action="store_true",
|
||||
help="Skip LLM-based checks (contradictions) - faster and free",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Running knowledge base lint checks...")
|
||||
all_issues: list[dict] = []
|
||||
|
||||
# Structural checks (free, instant)
|
||||
checks = [
|
||||
("Broken links", check_broken_links),
|
||||
("Orphan pages", check_orphan_pages),
|
||||
("Orphan sources", check_orphan_sources),
|
||||
("Stale articles", check_stale_articles),
|
||||
("Missing backlinks", check_missing_backlinks),
|
||||
("Sparse articles", check_sparse_articles),
|
||||
]
|
||||
|
||||
for name, check_fn in checks:
|
||||
print(f" Checking: {name}...")
|
||||
issues = check_fn()
|
||||
all_issues.extend(issues)
|
||||
print(f" Found {len(issues)} issue(s)")
|
||||
|
||||
# LLM check (costs money)
|
||||
if not args.structural_only:
|
||||
print(" Checking: Contradictions (LLM)...")
|
||||
issues = asyncio.run(check_contradictions())
|
||||
all_issues.extend(issues)
|
||||
print(f" Found {len(issues)} issue(s)")
|
||||
else:
|
||||
print(" Skipping: Contradictions (--structural-only)")
|
||||
|
||||
# Generate and save report
|
||||
report = generate_report(all_issues)
|
||||
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
report_path = REPORTS_DIR / f"lint-{today_iso()}.md"
|
||||
report_path.write_text(report, encoding="utf-8")
|
||||
print(f"\nReport saved to: {report_path}")
|
||||
|
||||
# Update state
|
||||
state = load_state()
|
||||
state["last_lint"] = now_iso()
|
||||
save_state(state)
|
||||
|
||||
# Summary
|
||||
errors = sum(1 for i in all_issues if i["severity"] == "error")
|
||||
warnings = sum(1 for i in all_issues if i["severity"] == "warning")
|
||||
suggestions = sum(1 for i in all_issues if i["severity"] == "suggestion")
|
||||
print(f"\nResults: {errors} errors, {warnings} warnings, {suggestions} suggestions")
|
||||
|
||||
if errors > 0:
|
||||
print("\nErrors found - knowledge base needs attention!")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
138
scripts/query.py
Normal file
138
scripts/query.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
"""
|
||||
Query the knowledge base using index-guided retrieval (no RAG).
|
||||
|
||||
The LLM reads the index, picks relevant articles, and synthesizes an answer.
|
||||
No vector database, no embeddings, no chunking - just structured markdown
|
||||
and an index the LLM can reason over.
|
||||
|
||||
Usage:
|
||||
uv run python query.py "How should I handle auth redirects?"
|
||||
uv run python query.py "What patterns do I use for API design?" --file-back
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from config import KNOWLEDGE_DIR, QA_DIR, now_iso
|
||||
from utils import load_state, read_all_wiki_content, save_state
|
||||
|
||||
ROOT_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
async def run_query(question: str, file_back: bool = False) -> str:
|
||||
"""Query the knowledge base and optionally file the answer back."""
|
||||
from claude_agent_sdk import (
|
||||
AssistantMessage,
|
||||
ClaudeAgentOptions,
|
||||
ResultMessage,
|
||||
TextBlock,
|
||||
query,
|
||||
)
|
||||
|
||||
wiki_content = read_all_wiki_content()
|
||||
|
||||
tools = ["Read", "Glob", "Grep"]
|
||||
if file_back:
|
||||
tools.extend(["Write", "Edit"])
|
||||
|
||||
file_back_instructions = ""
|
||||
if file_back:
|
||||
timestamp = now_iso()
|
||||
file_back_instructions = f"""
|
||||
|
||||
## File Back Instructions
|
||||
|
||||
After answering, do the following:
|
||||
1. Create a Q&A article at {QA_DIR}/ with the filename being a slugified version
|
||||
of the question (e.g., knowledge/qa/how-to-handle-auth-redirects.md)
|
||||
2. Use the Q&A article format from the schema (frontmatter with title, question,
|
||||
consulted articles, filed date)
|
||||
3. Update {KNOWLEDGE_DIR / 'index.md'} with a new row for this Q&A article
|
||||
4. Append to {KNOWLEDGE_DIR / 'log.md'}:
|
||||
## [{timestamp}] query (filed) | question summary
|
||||
- Question: {question}
|
||||
- Consulted: [[list of articles read]]
|
||||
- Filed to: [[qa/article-name]]
|
||||
"""
|
||||
|
||||
prompt = f"""You are a knowledge base query engine. Answer the user's question by
|
||||
consulting the knowledge base below.
|
||||
|
||||
## How to Answer
|
||||
|
||||
1. Read the INDEX section first - it lists every article with a one-line summary
|
||||
2. Identify 3-10 articles that are relevant to the question
|
||||
3. Read those articles carefully (they're included below)
|
||||
4. Synthesize a clear, thorough answer
|
||||
5. Cite your sources using [[wikilinks]] (e.g., [[concepts/supabase-auth]])
|
||||
6. If the knowledge base doesn't contain relevant information, say so honestly
|
||||
|
||||
## Knowledge Base
|
||||
|
||||
{wiki_content}
|
||||
|
||||
## Question
|
||||
|
||||
{question}
|
||||
{file_back_instructions}"""
|
||||
|
||||
answer = ""
|
||||
cost = 0.0
|
||||
|
||||
try:
|
||||
async for message in query(
|
||||
prompt=prompt,
|
||||
options=ClaudeAgentOptions(
|
||||
cwd=str(ROOT_DIR),
|
||||
system_prompt={"type": "preset", "preset": "claude_code"},
|
||||
allowed_tools=tools,
|
||||
permission_mode="acceptEdits",
|
||||
max_turns=15,
|
||||
),
|
||||
):
|
||||
if isinstance(message, AssistantMessage):
|
||||
for block in message.content:
|
||||
if isinstance(block, TextBlock):
|
||||
answer += block.text
|
||||
elif isinstance(message, ResultMessage):
|
||||
cost = message.total_cost_usd or 0.0
|
||||
except Exception as e:
|
||||
answer = f"Error querying knowledge base: {e}"
|
||||
|
||||
# Update state
|
||||
state = load_state()
|
||||
state["query_count"] = state.get("query_count", 0) + 1
|
||||
state["total_cost"] = state.get("total_cost", 0.0) + cost
|
||||
save_state(state)
|
||||
|
||||
return answer
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Query the personal knowledge base")
|
||||
parser.add_argument("question", help="The question to ask")
|
||||
parser.add_argument(
|
||||
"--file-back",
|
||||
action="store_true",
|
||||
help="File the answer back into the knowledge base as a Q&A article",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"Question: {args.question}")
|
||||
print(f"File back: {'yes' if args.file_back else 'no'}")
|
||||
print("-" * 60)
|
||||
|
||||
answer = asyncio.run(run_query(args.question, file_back=args.file_back))
|
||||
print(answer)
|
||||
|
||||
if args.file_back:
|
||||
print("\n" + "-" * 60)
|
||||
qa_count = len(list(QA_DIR.glob("*.md"))) if QA_DIR.exists() else 0
|
||||
print(f"Answer filed to knowledge/qa/ ({qa_count} Q&A articles total)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
133
scripts/utils.py
Normal file
133
scripts/utils.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
"""Shared utilities for the personal knowledge base."""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from config import (
|
||||
CONCEPTS_DIR,
|
||||
CONNECTIONS_DIR,
|
||||
DAILY_DIR,
|
||||
INDEX_FILE,
|
||||
KNOWLEDGE_DIR,
|
||||
LOG_FILE,
|
||||
QA_DIR,
|
||||
STATE_FILE,
|
||||
)
|
||||
|
||||
|
||||
# ── State management ──────────────────────────────────────────────────
|
||||
|
||||
def load_state() -> dict:
|
||||
"""Load persistent state from state.json."""
|
||||
if STATE_FILE.exists():
|
||||
return json.loads(STATE_FILE.read_text(encoding="utf-8"))
|
||||
return {"ingested": {}, "query_count": 0, "last_lint": None, "total_cost": 0.0}
|
||||
|
||||
|
||||
def save_state(state: dict) -> None:
|
||||
"""Save state to state.json."""
|
||||
STATE_FILE.write_text(json.dumps(state, indent=2), encoding="utf-8")
|
||||
|
||||
|
||||
# ── File hashing ──────────────────────────────────────────────────────
|
||||
|
||||
def file_hash(path: Path) -> str:
|
||||
"""SHA-256 hash of a file (first 16 hex chars)."""
|
||||
return hashlib.sha256(path.read_bytes()).hexdigest()[:16]
|
||||
|
||||
|
||||
# ── Slug / naming ─────────────────────────────────────────────────────
|
||||
|
||||
def slugify(text: str) -> str:
|
||||
"""Convert text to a filename-safe slug."""
|
||||
text = text.lower().strip()
|
||||
text = re.sub(r"[^\w\s-]", "", text)
|
||||
text = re.sub(r"[\s_]+", "-", text)
|
||||
text = re.sub(r"-+", "-", text)
|
||||
return text.strip("-")
|
||||
|
||||
|
||||
# ── Wikilink helpers ──────────────────────────────────────────────────
|
||||
|
||||
def extract_wikilinks(content: str) -> list[str]:
|
||||
"""Extract all [[wikilinks]] from markdown content."""
|
||||
return re.findall(r"\[\[([^\]]+)\]\]", content)
|
||||
|
||||
|
||||
def wiki_article_exists(link: str) -> bool:
|
||||
"""Check if a wikilinked article exists on disk."""
|
||||
path = KNOWLEDGE_DIR / f"{link}.md"
|
||||
return path.exists()
|
||||
|
||||
|
||||
# ── Wiki content helpers ──────────────────────────────────────────────
|
||||
|
||||
def read_wiki_index() -> str:
|
||||
"""Read the knowledge base index file."""
|
||||
if INDEX_FILE.exists():
|
||||
return INDEX_FILE.read_text(encoding="utf-8")
|
||||
return "# Knowledge Base Index\n\n| Article | Summary | Compiled From | Updated |\n|---------|---------|---------------|---------|"
|
||||
|
||||
|
||||
def read_all_wiki_content() -> str:
|
||||
"""Read index + all wiki articles into a single string for context."""
|
||||
parts = [f"## INDEX\n\n{read_wiki_index()}"]
|
||||
|
||||
for subdir in [CONCEPTS_DIR, CONNECTIONS_DIR, QA_DIR]:
|
||||
if not subdir.exists():
|
||||
continue
|
||||
for md_file in sorted(subdir.glob("*.md")):
|
||||
rel = md_file.relative_to(KNOWLEDGE_DIR)
|
||||
content = md_file.read_text(encoding="utf-8")
|
||||
parts.append(f"## {rel}\n\n{content}")
|
||||
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
|
||||
def list_wiki_articles() -> list[Path]:
|
||||
"""List all wiki article files."""
|
||||
articles = []
|
||||
for subdir in [CONCEPTS_DIR, CONNECTIONS_DIR, QA_DIR]:
|
||||
if subdir.exists():
|
||||
articles.extend(sorted(subdir.glob("*.md")))
|
||||
return articles
|
||||
|
||||
|
||||
def list_raw_files() -> list[Path]:
|
||||
"""List all daily log files."""
|
||||
if not DAILY_DIR.exists():
|
||||
return []
|
||||
return sorted(DAILY_DIR.glob("*.md"))
|
||||
|
||||
|
||||
# ── Index helpers ─────────────────────────────────────────────────────
|
||||
|
||||
def count_inbound_links(target: str, exclude_file: Path | None = None) -> int:
|
||||
"""Count how many wiki articles link to a given target."""
|
||||
count = 0
|
||||
for article in list_wiki_articles():
|
||||
if article == exclude_file:
|
||||
continue
|
||||
content = article.read_text(encoding="utf-8")
|
||||
if f"[[{target}]]" in content:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def get_article_word_count(path: Path) -> int:
|
||||
"""Count words in an article, excluding YAML frontmatter."""
|
||||
content = path.read_text(encoding="utf-8")
|
||||
# Strip frontmatter
|
||||
if content.startswith("---"):
|
||||
end = content.find("---", 3)
|
||||
if end != -1:
|
||||
content = content[end + 3:]
|
||||
return len(content.split())
|
||||
|
||||
|
||||
def build_index_entry(rel_path: str, summary: str, sources: str, updated: str) -> str:
|
||||
"""Build a single index table row."""
|
||||
link = rel_path.replace(".md", "")
|
||||
return f"| [[{link}]] | {summary} | {sources} | {updated} |"
|
||||
Loading…
Add table
Add a link
Reference in a new issue