memoria/scripts/lint.py
agent-admin b57ce15fff fix: prevent global hooks from firing inside SDK-spawned Claude subprocesses
Discovered during Memoria Phase 4 first compile run: when compile.py
invokes claude_agent_sdk.query(), the spawned `claude` subprocess
inherits the global ~/.claude/settings.json hook config. Its
SessionEnd hook then fires when the subprocess wraps up, triggering
flush.py against today's daily log — polluting the log with compile
metadata and creating a soft recursion (every compile call also
generates a flush call).

flush.py already had this guard (CLAUDE_INVOKED_BY=memory_flush set
at module top before any SDK import). compile.py / query.py / lint.py
did not.

Add the same guard to the other three SDK call sites with
script-specific sentinel values:
  - compile.py → memoria_compile
  - query.py   → memoria_query
  - lint.py    → memoria_lint

The sentinel value doesn't matter — both session-end.py and
pre-compact.py check `if os.environ.get("CLAUDE_INVOKED_BY"): exit(0)`,
so any non-empty value short-circuits. Using distinct sentinels makes
diagnostics clearer if a hook trace ever shows it.

Verified: imports clean, all 29 acceptance tests still pass.
2026-04-24 18:29:29 -04:00

341 lines
12 KiB
Python

"""
Lint the knowledge base for structural and semantic health.
Runs 7 checks: broken links, orphan pages, orphan sources, stale articles,
contradictions (LLM), missing backlinks, and sparse articles.
Usage:
uv run python lint.py # all checks
uv run python lint.py --structural-only # skip LLM checks (faster, cheaper)
"""
from __future__ import annotations
import os
# Recursion guard — set before any SDK import so global SessionEnd/PreCompact
# hooks see CLAUDE_INVOKED_BY in the nested Claude subprocess's env and exit
# cleanly. See compile.py for full rationale.
os.environ["CLAUDE_INVOKED_BY"] = "memoria_lint"
import argparse
import asyncio
from pathlib import Path
from config import KNOWLEDGE_DIR, REPORTS_DIR, now_iso, today_iso
# Contradiction-check model. Kept as Sonnet for reasoning quality; override
# via MEMORIA_LINT_MODEL (e.g. to use a cheaper model for structural runs
# that happen to include the LLM check).
LINT_MODEL = os.environ.get("MEMORIA_LINT_MODEL", "sonnet")
from utils import (
count_inbound_links,
extract_wikilinks,
file_hash,
get_article_word_count,
list_raw_files,
list_wiki_articles,
load_state,
read_all_wiki_content,
save_state,
wiki_article_exists,
)
ROOT_DIR = Path(__file__).resolve().parent.parent
def check_broken_links() -> list[dict]:
"""Check for [[wikilinks]] that point to non-existent articles."""
issues = []
for article in list_wiki_articles():
content = article.read_text(encoding="utf-8")
rel = article.relative_to(KNOWLEDGE_DIR)
for link in extract_wikilinks(content):
if link.startswith("daily/"):
continue # daily log references are valid
if not wiki_article_exists(link):
issues.append({
"severity": "error",
"check": "broken_link",
"file": str(rel),
"detail": f"Broken link: [[{link}]] - target does not exist",
})
return issues
def check_orphan_pages() -> list[dict]:
"""Check for articles with zero inbound links."""
issues = []
for article in list_wiki_articles():
rel = article.relative_to(KNOWLEDGE_DIR)
link_target = str(rel).replace(".md", "").replace("\\", "/")
inbound = count_inbound_links(link_target)
if inbound == 0:
issues.append({
"severity": "warning",
"check": "orphan_page",
"file": str(rel),
"detail": f"Orphan page: no other articles link to [[{link_target}]]",
})
return issues
def check_orphan_sources() -> list[dict]:
"""Check for daily logs that haven't been compiled yet."""
state = load_state()
ingested = state.get("ingested", {})
issues = []
for log_path in list_raw_files():
if log_path.name not in ingested:
issues.append({
"severity": "warning",
"check": "orphan_source",
"file": f"daily/{log_path.name}",
"detail": f"Uncompiled daily log: {log_path.name} has not been ingested",
})
return issues
def check_stale_articles() -> list[dict]:
"""Check if source daily logs have changed since compilation."""
state = load_state()
ingested = state.get("ingested", {})
issues = []
for log_path in list_raw_files():
rel = log_path.name
if rel in ingested:
stored_hash = ingested[rel].get("hash", "")
current_hash = file_hash(log_path)
if stored_hash != current_hash:
issues.append({
"severity": "warning",
"check": "stale_article",
"file": f"daily/{rel}",
"detail": f"Stale: {rel} has changed since last compilation",
})
return issues
def check_missing_backlinks() -> list[dict]:
"""Check for asymmetric links: A links to B but B doesn't link to A.
Skips any source or target under `qa/` or `sources/`: Q&A articles
intentionally reference concepts without requiring a reciprocal link
(concepts would otherwise accumulate a backlink per question, which
drowns real relationships). Also handles pipe-aliased wikilinks via
the alias-aware extract_wikilinks helper.
"""
issues = []
# Aliased/nested variants of the source link that should count as a
# valid backlink on the target side: bare slug, and pipe-aliased form.
for article in list_wiki_articles():
content = article.read_text(encoding="utf-8")
rel = article.relative_to(KNOWLEDGE_DIR)
rel_str = str(rel).replace("\\", "/")
# Skip one-way source categories.
if rel_str.startswith("qa/") or rel_str.startswith("sources/"):
continue
source_link = rel_str.replace(".md", "")
for link in extract_wikilinks(content):
if link.startswith("daily/") or link.startswith("qa/") or link.startswith("sources/"):
continue
target_path = KNOWLEDGE_DIR / f"{link}.md"
if target_path.exists():
target_content = target_path.read_text(encoding="utf-8")
target_backlinks = extract_wikilinks(target_content)
if source_link not in target_backlinks:
issues.append({
"severity": "suggestion",
"check": "missing_backlink",
"file": str(rel),
"detail": f"[[{source_link}]] links to [[{link}]] but not vice versa",
"auto_fixable": True,
})
return issues
def check_sparse_articles() -> list[dict]:
"""Check for articles with fewer than 200 words."""
issues = []
for article in list_wiki_articles():
word_count = get_article_word_count(article)
if word_count < 200:
rel = article.relative_to(KNOWLEDGE_DIR)
issues.append({
"severity": "suggestion",
"check": "sparse_article",
"file": str(rel),
"detail": f"Sparse article: {word_count} words (minimum recommended: 200)",
})
return issues
async def check_contradictions() -> list[dict]:
"""Use LLM to detect contradictions across articles."""
from claude_agent_sdk import (
AssistantMessage,
ClaudeAgentOptions,
ResultMessage,
TextBlock,
query,
)
wiki_content = read_all_wiki_content()
prompt = f"""Review this knowledge base for contradictions, inconsistencies, or
conflicting claims across articles.
## Knowledge Base
{wiki_content}
## Instructions
Look for:
- Direct contradictions (article A says X, article B says not-X)
- Inconsistent recommendations (different articles recommend conflicting approaches)
- Outdated information that conflicts with newer entries
For each issue found, output EXACTLY one line in this format:
CONTRADICTION: [file1] vs [file2] - description of the conflict
INCONSISTENCY: [file] - description of the inconsistency
If no issues found, output exactly: NO_ISSUES
Do NOT output anything else - no preamble, no explanation, just the formatted lines."""
response = ""
try:
async for message in query(
prompt=prompt,
options=ClaudeAgentOptions(
cwd=str(ROOT_DIR),
model=LINT_MODEL,
allowed_tools=[],
max_turns=2,
),
):
if isinstance(message, AssistantMessage):
for block in message.content:
if isinstance(block, TextBlock):
response += block.text
except Exception as e:
return [{"severity": "error", "check": "contradiction", "file": "(system)", "detail": f"LLM check failed: {e}"}]
issues = []
if "NO_ISSUES" not in response:
for line in response.strip().split("\n"):
line = line.strip()
if line.startswith("CONTRADICTION:") or line.startswith("INCONSISTENCY:"):
issues.append({
"severity": "warning",
"check": "contradiction",
"file": "(cross-article)",
"detail": line,
})
return issues
def generate_report(all_issues: list[dict]) -> str:
"""Generate a markdown lint report."""
errors = [i for i in all_issues if i["severity"] == "error"]
warnings = [i for i in all_issues if i["severity"] == "warning"]
suggestions = [i for i in all_issues if i["severity"] == "suggestion"]
lines = [
f"# Lint Report - {today_iso()}",
"",
f"**Total issues:** {len(all_issues)}",
f"- Errors: {len(errors)}",
f"- Warnings: {len(warnings)}",
f"- Suggestions: {len(suggestions)}",
"",
]
for severity, issues, marker in [
("Errors", errors, "x"),
("Warnings", warnings, "!"),
("Suggestions", suggestions, "?"),
]:
if issues:
lines.append(f"## {severity}")
lines.append("")
for issue in issues:
fixable = " (auto-fixable)" if issue.get("auto_fixable") else ""
lines.append(f"- **[{marker}]** `{issue['file']}` - {issue['detail']}{fixable}")
lines.append("")
if not all_issues:
lines.append("All checks passed. Knowledge base is healthy.")
lines.append("")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="Lint the knowledge base")
parser.add_argument(
"--structural-only",
action="store_true",
help="Skip LLM-based checks (contradictions) - faster and free",
)
args = parser.parse_args()
print("Running knowledge base lint checks...")
all_issues: list[dict] = []
# Structural checks (free, instant)
checks = [
("Broken links", check_broken_links),
("Orphan pages", check_orphan_pages),
("Orphan sources", check_orphan_sources),
("Stale articles", check_stale_articles),
("Missing backlinks", check_missing_backlinks),
("Sparse articles", check_sparse_articles),
]
for name, check_fn in checks:
print(f" Checking: {name}...")
issues = check_fn()
all_issues.extend(issues)
print(f" Found {len(issues)} issue(s)")
# LLM check (costs money)
if not args.structural_only:
print(" Checking: Contradictions (LLM)...")
issues = asyncio.run(check_contradictions())
all_issues.extend(issues)
print(f" Found {len(issues)} issue(s)")
else:
print(" Skipping: Contradictions (--structural-only)")
# Generate and save report
report = generate_report(all_issues)
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
report_path = REPORTS_DIR / f"lint-{today_iso()}.md"
report_path.write_text(report, encoding="utf-8")
print(f"\nReport saved to: {report_path}")
# Update state
state = load_state()
state["last_lint"] = now_iso()
save_state(state)
# Summary
errors = sum(1 for i in all_issues if i["severity"] == "error")
warnings = sum(1 for i in all_issues if i["severity"] == "warning")
suggestions = sum(1 for i in all_issues if i["severity"] == "suggestion")
print(f"\nResults: {errors} errors, {warnings} warnings, {suggestions} suggestions")
if errors > 0:
print("\nErrors found - knowledge base needs attention!")
return 1
return 0
if __name__ == "__main__":
exit(main())