Claude Code Memory Compiler
This commit is contained in:
commit
f83d38d787
15 changed files with 2819 additions and 0 deletions
312
scripts/lint.py
Normal file
312
scripts/lint.py
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
"""
|
||||
Lint the knowledge base for structural and semantic health.
|
||||
|
||||
Runs 7 checks: broken links, orphan pages, orphan sources, stale articles,
|
||||
contradictions (LLM), missing backlinks, and sparse articles.
|
||||
|
||||
Usage:
|
||||
uv run python lint.py # all checks
|
||||
uv run python lint.py --structural-only # skip LLM checks (faster, cheaper)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from config import KNOWLEDGE_DIR, REPORTS_DIR, now_iso, today_iso
|
||||
from utils import (
|
||||
count_inbound_links,
|
||||
extract_wikilinks,
|
||||
file_hash,
|
||||
get_article_word_count,
|
||||
list_raw_files,
|
||||
list_wiki_articles,
|
||||
load_state,
|
||||
read_all_wiki_content,
|
||||
save_state,
|
||||
wiki_article_exists,
|
||||
)
|
||||
|
||||
ROOT_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def check_broken_links() -> list[dict]:
|
||||
"""Check for [[wikilinks]] that point to non-existent articles."""
|
||||
issues = []
|
||||
for article in list_wiki_articles():
|
||||
content = article.read_text(encoding="utf-8")
|
||||
rel = article.relative_to(KNOWLEDGE_DIR)
|
||||
for link in extract_wikilinks(content):
|
||||
if link.startswith("daily/"):
|
||||
continue # daily log references are valid
|
||||
if not wiki_article_exists(link):
|
||||
issues.append({
|
||||
"severity": "error",
|
||||
"check": "broken_link",
|
||||
"file": str(rel),
|
||||
"detail": f"Broken link: [[{link}]] - target does not exist",
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
def check_orphan_pages() -> list[dict]:
|
||||
"""Check for articles with zero inbound links."""
|
||||
issues = []
|
||||
for article in list_wiki_articles():
|
||||
rel = article.relative_to(KNOWLEDGE_DIR)
|
||||
link_target = str(rel).replace(".md", "").replace("\\", "/")
|
||||
inbound = count_inbound_links(link_target)
|
||||
if inbound == 0:
|
||||
issues.append({
|
||||
"severity": "warning",
|
||||
"check": "orphan_page",
|
||||
"file": str(rel),
|
||||
"detail": f"Orphan page: no other articles link to [[{link_target}]]",
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
def check_orphan_sources() -> list[dict]:
|
||||
"""Check for daily logs that haven't been compiled yet."""
|
||||
state = load_state()
|
||||
ingested = state.get("ingested", {})
|
||||
issues = []
|
||||
for log_path in list_raw_files():
|
||||
if log_path.name not in ingested:
|
||||
issues.append({
|
||||
"severity": "warning",
|
||||
"check": "orphan_source",
|
||||
"file": f"daily/{log_path.name}",
|
||||
"detail": f"Uncompiled daily log: {log_path.name} has not been ingested",
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
def check_stale_articles() -> list[dict]:
|
||||
"""Check if source daily logs have changed since compilation."""
|
||||
state = load_state()
|
||||
ingested = state.get("ingested", {})
|
||||
issues = []
|
||||
for log_path in list_raw_files():
|
||||
rel = log_path.name
|
||||
if rel in ingested:
|
||||
stored_hash = ingested[rel].get("hash", "")
|
||||
current_hash = file_hash(log_path)
|
||||
if stored_hash != current_hash:
|
||||
issues.append({
|
||||
"severity": "warning",
|
||||
"check": "stale_article",
|
||||
"file": f"daily/{rel}",
|
||||
"detail": f"Stale: {rel} has changed since last compilation",
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
def check_missing_backlinks() -> list[dict]:
|
||||
"""Check for asymmetric links: A links to B but B doesn't link to A."""
|
||||
issues = []
|
||||
for article in list_wiki_articles():
|
||||
content = article.read_text(encoding="utf-8")
|
||||
rel = article.relative_to(KNOWLEDGE_DIR)
|
||||
source_link = str(rel).replace(".md", "").replace("\\", "/")
|
||||
|
||||
for link in extract_wikilinks(content):
|
||||
if link.startswith("daily/"):
|
||||
continue
|
||||
target_path = KNOWLEDGE_DIR / f"{link}.md"
|
||||
if target_path.exists():
|
||||
target_content = target_path.read_text(encoding="utf-8")
|
||||
if f"[[{source_link}]]" not in target_content:
|
||||
issues.append({
|
||||
"severity": "suggestion",
|
||||
"check": "missing_backlink",
|
||||
"file": str(rel),
|
||||
"detail": f"[[{source_link}]] links to [[{link}]] but not vice versa",
|
||||
"auto_fixable": True,
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
def check_sparse_articles() -> list[dict]:
|
||||
"""Check for articles with fewer than 200 words."""
|
||||
issues = []
|
||||
for article in list_wiki_articles():
|
||||
word_count = get_article_word_count(article)
|
||||
if word_count < 200:
|
||||
rel = article.relative_to(KNOWLEDGE_DIR)
|
||||
issues.append({
|
||||
"severity": "suggestion",
|
||||
"check": "sparse_article",
|
||||
"file": str(rel),
|
||||
"detail": f"Sparse article: {word_count} words (minimum recommended: 200)",
|
||||
})
|
||||
return issues
|
||||
|
||||
|
||||
async def check_contradictions() -> list[dict]:
|
||||
"""Use LLM to detect contradictions across articles."""
|
||||
from claude_agent_sdk import (
|
||||
AssistantMessage,
|
||||
ClaudeAgentOptions,
|
||||
ResultMessage,
|
||||
TextBlock,
|
||||
query,
|
||||
)
|
||||
|
||||
wiki_content = read_all_wiki_content()
|
||||
|
||||
prompt = f"""Review this knowledge base for contradictions, inconsistencies, or
|
||||
conflicting claims across articles.
|
||||
|
||||
## Knowledge Base
|
||||
|
||||
{wiki_content}
|
||||
|
||||
## Instructions
|
||||
|
||||
Look for:
|
||||
- Direct contradictions (article A says X, article B says not-X)
|
||||
- Inconsistent recommendations (different articles recommend conflicting approaches)
|
||||
- Outdated information that conflicts with newer entries
|
||||
|
||||
For each issue found, output EXACTLY one line in this format:
|
||||
CONTRADICTION: [file1] vs [file2] - description of the conflict
|
||||
INCONSISTENCY: [file] - description of the inconsistency
|
||||
|
||||
If no issues found, output exactly: NO_ISSUES
|
||||
|
||||
Do NOT output anything else - no preamble, no explanation, just the formatted lines."""
|
||||
|
||||
response = ""
|
||||
try:
|
||||
async for message in query(
|
||||
prompt=prompt,
|
||||
options=ClaudeAgentOptions(
|
||||
cwd=str(ROOT_DIR),
|
||||
allowed_tools=[],
|
||||
max_turns=2,
|
||||
),
|
||||
):
|
||||
if isinstance(message, AssistantMessage):
|
||||
for block in message.content:
|
||||
if isinstance(block, TextBlock):
|
||||
response += block.text
|
||||
except Exception as e:
|
||||
return [{"severity": "error", "check": "contradiction", "file": "(system)", "detail": f"LLM check failed: {e}"}]
|
||||
|
||||
issues = []
|
||||
if "NO_ISSUES" not in response:
|
||||
for line in response.strip().split("\n"):
|
||||
line = line.strip()
|
||||
if line.startswith("CONTRADICTION:") or line.startswith("INCONSISTENCY:"):
|
||||
issues.append({
|
||||
"severity": "warning",
|
||||
"check": "contradiction",
|
||||
"file": "(cross-article)",
|
||||
"detail": line,
|
||||
})
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def generate_report(all_issues: list[dict]) -> str:
|
||||
"""Generate a markdown lint report."""
|
||||
errors = [i for i in all_issues if i["severity"] == "error"]
|
||||
warnings = [i for i in all_issues if i["severity"] == "warning"]
|
||||
suggestions = [i for i in all_issues if i["severity"] == "suggestion"]
|
||||
|
||||
lines = [
|
||||
f"# Lint Report - {today_iso()}",
|
||||
"",
|
||||
f"**Total issues:** {len(all_issues)}",
|
||||
f"- Errors: {len(errors)}",
|
||||
f"- Warnings: {len(warnings)}",
|
||||
f"- Suggestions: {len(suggestions)}",
|
||||
"",
|
||||
]
|
||||
|
||||
for severity, issues, marker in [
|
||||
("Errors", errors, "x"),
|
||||
("Warnings", warnings, "!"),
|
||||
("Suggestions", suggestions, "?"),
|
||||
]:
|
||||
if issues:
|
||||
lines.append(f"## {severity}")
|
||||
lines.append("")
|
||||
for issue in issues:
|
||||
fixable = " (auto-fixable)" if issue.get("auto_fixable") else ""
|
||||
lines.append(f"- **[{marker}]** `{issue['file']}` - {issue['detail']}{fixable}")
|
||||
lines.append("")
|
||||
|
||||
if not all_issues:
|
||||
lines.append("All checks passed. Knowledge base is healthy.")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Lint the knowledge base")
|
||||
parser.add_argument(
|
||||
"--structural-only",
|
||||
action="store_true",
|
||||
help="Skip LLM-based checks (contradictions) - faster and free",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Running knowledge base lint checks...")
|
||||
all_issues: list[dict] = []
|
||||
|
||||
# Structural checks (free, instant)
|
||||
checks = [
|
||||
("Broken links", check_broken_links),
|
||||
("Orphan pages", check_orphan_pages),
|
||||
("Orphan sources", check_orphan_sources),
|
||||
("Stale articles", check_stale_articles),
|
||||
("Missing backlinks", check_missing_backlinks),
|
||||
("Sparse articles", check_sparse_articles),
|
||||
]
|
||||
|
||||
for name, check_fn in checks:
|
||||
print(f" Checking: {name}...")
|
||||
issues = check_fn()
|
||||
all_issues.extend(issues)
|
||||
print(f" Found {len(issues)} issue(s)")
|
||||
|
||||
# LLM check (costs money)
|
||||
if not args.structural_only:
|
||||
print(" Checking: Contradictions (LLM)...")
|
||||
issues = asyncio.run(check_contradictions())
|
||||
all_issues.extend(issues)
|
||||
print(f" Found {len(issues)} issue(s)")
|
||||
else:
|
||||
print(" Skipping: Contradictions (--structural-only)")
|
||||
|
||||
# Generate and save report
|
||||
report = generate_report(all_issues)
|
||||
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
report_path = REPORTS_DIR / f"lint-{today_iso()}.md"
|
||||
report_path.write_text(report, encoding="utf-8")
|
||||
print(f"\nReport saved to: {report_path}")
|
||||
|
||||
# Update state
|
||||
state = load_state()
|
||||
state["last_lint"] = now_iso()
|
||||
save_state(state)
|
||||
|
||||
# Summary
|
||||
errors = sum(1 for i in all_issues if i["severity"] == "error")
|
||||
warnings = sum(1 for i in all_issues if i["severity"] == "warning")
|
||||
suggestions = sum(1 for i in all_issues if i["severity"] == "suggestion")
|
||||
print(f"\nResults: {errors} errors, {warnings} warnings, {suggestions} suggestions")
|
||||
|
||||
if errors > 0:
|
||||
print("\nErrors found - knowledge base needs attention!")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue