From 03296be47a9a66b44c074b494ffa8b004d98defb Mon Sep 17 00:00:00 2001 From: agent-admin Date: Fri, 24 Apr 2026 17:48:48 -0400 Subject: [PATCH] fork: scaling fixes (index-only context + chunking + model wiring) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes upstream issues #3/#5/#9 (whole-wiki in every prompt) and adds large-log chunking. Addresses the audit's P1 scaling findings (C1), the chunking requirement operator added on top, C8 explicit model wiring across all LLM call sites, and D3 single-event-loop refactor. ## compile.py - **Index-only context.** The `existing_articles_context` concatenation of every wiki article has been removed from the prompt. Instead the LLM receives only the index + schema + daily log and uses the Read tool (already in allowed_tools) to fetch specific articles it decides are relevant. Prompt size stays bounded regardless of KB growth — upstream's 250K-token prompts past ~100 articles are gone. - **Chunking.** `_split_log_into_chunks()` splits oversized daily logs along `### ` section boundaries. Threshold MAX_LOG_CHARS_PER_CHUNK (default 100K chars ≈ 25K tokens, configurable via MEMORIA_MAX_LOG_CHARS). Chunks compile via separate LLM calls that naturally merge through Edit on shared files. Oversized single sections emit as their own chunks rather than splitting mid-thought. - **Atomic state on chunked compile.** State is only written after ALL chunks succeed — partial-failure leaves the log flagged as uncompiled in state.json so the next run retries it cleanly. Was already correct for single-chunk logs (early return on SDK error) and now correct for multi-chunk too. - **Explicit model.** `model=COMPILE_MODEL` passed to ClaudeAgentOptions. Default "sonnet"; override via MEMORIA_COMPILE_MODEL env var. - **D3: single asyncio.run.** The per-file `asyncio.run()` in the compile loop is replaced with one outer call wrapping `_compile_all`. Avoids repeated event-loop setup/teardown and matches the pattern used for async resources in the SDK. ## query.py - **Index-only context.** `read_all_wiki_content()` replaced with `read_wiki_index()`. The LLM reads the index and uses its Read tool to fetch specific articles. Same rationale as compile.py — keeps prompt size bounded and cost predictable. - **Explicit model.** `model=QUERY_MODEL`, default "sonnet", override via MEMORIA_QUERY_MODEL. ## lint.py - **C9: skip qa/sources in missing-backlink check.** Articles under qa/ or sources/ no longer trigger a suggestion that every referenced concept should backlink to them. Concepts aren't expected to link back to every Q&A that mentions them — doing so would drown real relationships. - **Alias-aware backlink detection.** Uses `extract_wikilinks()` to parse the target's link list so `[[concepts/foo|Display]]` forms count as valid backlinks (previously required exact `[[foo]]` match, causing false positives on aliased forms). - **Explicit model.** `model=LINT_MODEL` in check_contradictions call, default "sonnet", override via MEMORIA_LINT_MODEL. ## Verified - Chunking: 120K-char 3-section log splits into 80K + 40K, reconstructs byte-exact. Oversized single section (150K) emits as its own chunk. Small log (<100K) returns as single chunk. - All patched modules import cleanly with expected config values. - compile_daily_log / query.run_query / flush.maybe_trigger_compilation / lint.check_missing_backlinks all callable post-patch. --- scripts/compile.py | 220 +++++++++++++++++++++++++++++++++------------ scripts/lint.py | 31 ++++++- scripts/query.py | 30 +++++-- 3 files changed, 213 insertions(+), 68 deletions(-) diff --git a/scripts/compile.py b/scripts/compile.py index 43baad6..a0787e9 100644 --- a/scripts/compile.py +++ b/scripts/compile.py @@ -15,6 +15,8 @@ from __future__ import annotations import argparse import asyncio +import os +import re import sys from pathlib import Path @@ -31,41 +33,81 @@ from utils import ( # ── Paths for the LLM to use ────────────────────────────────────────── ROOT_DIR = Path(__file__).resolve().parent.parent +# Compilation model (Sonnet by default — knowledge extraction benefits from +# strong reasoning; override via MEMORIA_COMPILE_MODEL for experiments). +COMPILE_MODEL = os.environ.get("MEMORIA_COMPILE_MODEL", "sonnet") -async def compile_daily_log(log_path: Path, state: dict) -> float: - """Compile a single daily log into knowledge articles. +# Chunk threshold for large daily logs. Anything above ~100K chars gets +# split along `### ` section boundaries so a single LLM call never +# receives the whole log when it's oversized. Each chunk compiles via a +# fresh Claude invocation; they merge naturally because all writes go +# through Edit on shared files (index.md, existing concept articles). +# +# 100K chars ≈ 25K tokens — well under Claude's context window even +# after schema + index + instructions + headroom. +MAX_LOG_CHARS_PER_CHUNK = int(os.environ.get("MEMORIA_MAX_LOG_CHARS", "100000")) - Returns the API cost of the compilation. + +def _split_log_into_chunks(log_content: str, max_chars: int) -> list[str]: + """Split a daily log by ### section headers if it exceeds max_chars. + + Returns a list of chunk strings where each chunk is <= max_chars (unless + a single section itself exceeds max_chars, in which case the section is + emitted as its own oversized chunk — preferable to splitting mid-thought). + + If the whole log is <= max_chars, returns a single-element list. """ - from claude_agent_sdk import ( - AssistantMessage, - ClaudeAgentOptions, - ResultMessage, - TextBlock, - query, - ) + if len(log_content) <= max_chars: + return [log_content] - log_content = log_path.read_text(encoding="utf-8") + # Split at ### boundaries, keeping the header attached to its body. + parts = re.split(r"(?m)(?=^### )", log_content) + + chunks: list[str] = [] + current = "" + for part in parts: + if not part: + continue + # If this part alone exceeds max_chars, emit it as its own chunk. + if len(part) > max_chars: + if current: + chunks.append(current) + current = "" + chunks.append(part) + continue + # If appending would overflow, close out current and start new. + if current and len(current) + len(part) > max_chars: + chunks.append(current) + current = part + else: + current += part + + if current: + chunks.append(current) + + return chunks + + +def _build_prompt(log_name: str, chunk_body: str, *, chunk_info: str = "") -> str: + """Assemble the compile prompt. + + Unlike upstream, we do NOT inline every existing article into the prompt + — that would send the whole wiki on every call, exploding cost and + hitting context limits past ~50 articles (upstream issues #3/#5/#9). + Instead, we provide: + * the schema (AGENTS.md) — stable structural rules + * the current index — lets the compiler identify which concepts exist + * the daily log — the new material to compile + The compiler uses its Read tool to fetch specific existing articles + it deems relevant (index has paths + summaries), keeping prompt size + bounded regardless of knowledge-base size. + """ schema = AGENTS_FILE.read_text(encoding="utf-8") wiki_index = read_wiki_index() - - # Read existing articles for context - existing_articles_context = "" - existing = {} - for article_path in list_wiki_articles(): - rel = article_path.relative_to(KNOWLEDGE_DIR) - existing[str(rel)] = article_path.read_text(encoding="utf-8") - - if existing: - parts = [] - for rel_path, content in existing.items(): - parts.append(f"### {rel_path}\n```markdown\n{content}\n```") - existing_articles_context = "\n\n".join(parts) - timestamp = now_iso() - prompt = f"""You are a knowledge compiler. Your job is to read a daily conversation log -and extract knowledge into structured wiki articles. + return f"""You are a knowledge compiler. Your job is to read a daily conversation log +and extract knowledge into structured wiki articles.{chunk_info} ## Schema (AGENTS.md) @@ -73,17 +115,19 @@ and extract knowledge into structured wiki articles. ## Current Wiki Index +The index below lists every existing wiki article with a one-line summary. +When extracting concepts, check this index first. If a concept already +exists, use the Read tool to fetch its current content and update it +rather than duplicating. Only fetch articles you actually need — do not +read the entire wiki. + {wiki_index} -## Existing Wiki Articles - -{existing_articles_context if existing_articles_context else "(No existing articles yet)"} - ## Daily Log to Compile -**File:** {log_path.name} +**File:** {log_name} -{log_content} +{chunk_body} ## Your Task @@ -91,22 +135,25 @@ Read the daily log above and compile it into wiki articles following the schema ### Rules: -1. **Extract key concepts** - Identify 3-7 distinct concepts worth their own article -2. **Create concept articles** in `knowledge/concepts/` - One .md file per concept +1. **Consult the index first.** Identify which concepts in the daily log + already have articles (use the Read tool to fetch them) and which are + new. Do not list or read the whole wiki — only what's relevant. +2. **Extract key concepts** - Identify 3-7 distinct concepts worth their own article +3. **Create concept articles** in `knowledge/concepts/` - One .md file per concept - Use the exact article format from AGENTS.md (YAML frontmatter + sections) - Include `sources:` in frontmatter pointing to the daily log file - Use `[[concepts/slug]]` wikilinks to link to related concepts - Write in encyclopedia style - neutral, comprehensive -3. **Create connection articles** in `knowledge/connections/` if this log reveals non-obvious +4. **Create connection articles** in `knowledge/connections/` if this log reveals non-obvious relationships between 2+ existing concepts -4. **Update existing articles** if this log adds new information to concepts already in the wiki +5. **Update existing articles** if this log adds new information to concepts already in the wiki - Read the existing article, add the new information, add the source to frontmatter -5. **Update knowledge/index.md** - Add new entries to the table +6. **Update knowledge/index.md** - Add new entries to the table - Each entry: `| [[path/slug]] | One-line summary | source-file | {timestamp[:10]} |` -6. **Append to knowledge/log.md** - Add a timestamped entry: +7. **Append to knowledge/log.md** - Add a timestamped entry: ``` - ## [{timestamp}] compile | {log_path.name} - - Source: daily/{log_path.name} + ## [{timestamp}] compile | {log_name} + - Source: daily/{log_name} - Articles created: [[concepts/x]], [[concepts/y]] - Articles updated: [[concepts/z]] (if any) ``` @@ -126,13 +173,29 @@ Read the daily log above and compile it into wiki articles following the schema - Sources section should cite the daily log with specific claims extracted """ - cost = 0.0 +async def _invoke_llm(prompt: str) -> tuple[float, bool]: + """Run one LLM compile pass. Returns (cost_usd, success). + + success=False means the SDK raised an exception — the caller must NOT + mark the daily log as compiled in state.json, so the log is retried on + the next run rather than silently dropped. + """ + from claude_agent_sdk import ( + AssistantMessage, + ClaudeAgentOptions, + ResultMessage, + TextBlock, + query, + ) + + cost = 0.0 try: async for message in query( prompt=prompt, options=ClaudeAgentOptions( cwd=str(ROOT_DIR), + model=COMPILE_MODEL, system_prompt={"type": "preset", "preset": "claude_code"}, allowed_tools=["Read", "Write", "Edit", "Glob", "Grep"], permission_mode="acceptEdits", @@ -142,25 +205,63 @@ Read the daily log above and compile it into wiki articles following the schema if isinstance(message, AssistantMessage): for block in message.content: if isinstance(block, TextBlock): - pass # compilation output - LLM writes files directly + pass # LLM writes files directly via tools elif isinstance(message, ResultMessage): cost = message.total_cost_usd or 0.0 - print(f" Cost: ${cost:.4f}") + print(f" Cost: ${cost:.4f}") + return cost, True except Exception as e: - print(f" Error: {e}") - return 0.0 + print(f" SDK error: {e}") + return cost, False - # Update state + +async def compile_daily_log(log_path: Path, state: dict) -> float: + """Compile a single daily log into knowledge articles. + + Splits large logs into `### `-bounded chunks before invoking the LLM, + so a single call never receives an oversized daily log. State is only + updated when ALL chunks succeed — partial failure leaves the log + flagged as uncompiled so the next run retries it. + + Returns total API cost of the compilation (sum across chunks). + """ + log_content = log_path.read_text(encoding="utf-8") + chunks = _split_log_into_chunks(log_content, MAX_LOG_CHARS_PER_CHUNK) + + total_cost = 0.0 + all_succeeded = True + + for i, chunk in enumerate(chunks, 1): + chunk_info = ( + f"\n\n(Chunk {i} of {len(chunks)} — compile the sections in this chunk; " + "remaining chunks of the same log follow in subsequent calls.)" + if len(chunks) > 1 + else "" + ) + prompt = _build_prompt(log_path.name, chunk, chunk_info=chunk_info) + print(f" Chunk {i}/{len(chunks)} ({len(chunk):,} chars)...") + cost, ok = await _invoke_llm(prompt) + total_cost += cost + if not ok: + all_succeeded = False + break + + if not all_succeeded: + print(f" FAILED: log not marked compiled; will retry on next run.") + return total_cost + + # All chunks succeeded — atomically update state. rel_path = log_path.name state.setdefault("ingested", {})[rel_path] = { "hash": file_hash(log_path), "compiled_at": now_iso(), - "cost_usd": cost, + "cost_usd": total_cost, + "chunks": len(chunks), } - state["total_cost"] = state.get("total_cost", 0.0) + cost + state["total_cost"] = state.get("total_cost", 0.0) + total_cost save_state(state) - return cost + return total_cost def main(): @@ -207,13 +308,18 @@ def main(): if args.dry_run: return - # Compile each file sequentially - total_cost = 0.0 - for i, log_path in enumerate(to_compile, 1): - print(f"\n[{i}/{len(to_compile)}] Compiling {log_path.name}...") - cost = asyncio.run(compile_daily_log(log_path, state)) - total_cost += cost - print(f" Done.") + async def _compile_all() -> float: + total = 0.0 + for i, log_path in enumerate(to_compile, 1): + print(f"\n[{i}/{len(to_compile)}] Compiling {log_path.name}...") + cost = await compile_daily_log(log_path, state) + total += cost + print(f" Done.") + return total + + # Single event-loop lifecycle for the whole batch — avoids reinit overhead + # and lets any async resources in the SDK settle predictably. + total_cost = asyncio.run(_compile_all()) articles = list_wiki_articles() print(f"\nCompilation complete. Total cost: ${total_cost:.2f}") diff --git a/scripts/lint.py b/scripts/lint.py index 9079eb1..9b6e609 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -13,9 +13,15 @@ from __future__ import annotations import argparse import asyncio +import os from pathlib import Path from config import KNOWLEDGE_DIR, REPORTS_DIR, now_iso, today_iso + +# Contradiction-check model. Kept as Sonnet for reasoning quality; override +# via MEMORIA_LINT_MODEL (e.g. to use a cheaper model for structural runs +# that happen to include the LLM check). +LINT_MODEL = os.environ.get("MEMORIA_LINT_MODEL", "sonnet") from utils import ( count_inbound_links, extract_wikilinks, @@ -105,20 +111,36 @@ def check_stale_articles() -> list[dict]: def check_missing_backlinks() -> list[dict]: - """Check for asymmetric links: A links to B but B doesn't link to A.""" + """Check for asymmetric links: A links to B but B doesn't link to A. + + Skips any source or target under `qa/` or `sources/`: Q&A articles + intentionally reference concepts without requiring a reciprocal link + (concepts would otherwise accumulate a backlink per question, which + drowns real relationships). Also handles pipe-aliased wikilinks via + the alias-aware extract_wikilinks helper. + """ issues = [] + # Aliased/nested variants of the source link that should count as a + # valid backlink on the target side: bare slug, and pipe-aliased form. for article in list_wiki_articles(): content = article.read_text(encoding="utf-8") rel = article.relative_to(KNOWLEDGE_DIR) - source_link = str(rel).replace(".md", "").replace("\\", "/") + rel_str = str(rel).replace("\\", "/") + + # Skip one-way source categories. + if rel_str.startswith("qa/") or rel_str.startswith("sources/"): + continue + + source_link = rel_str.replace(".md", "") for link in extract_wikilinks(content): - if link.startswith("daily/"): + if link.startswith("daily/") or link.startswith("qa/") or link.startswith("sources/"): continue target_path = KNOWLEDGE_DIR / f"{link}.md" if target_path.exists(): target_content = target_path.read_text(encoding="utf-8") - if f"[[{source_link}]]" not in target_content: + target_backlinks = extract_wikilinks(target_content) + if source_link not in target_backlinks: issues.append({ "severity": "suggestion", "check": "missing_backlink", @@ -185,6 +207,7 @@ Do NOT output anything else - no preamble, no explanation, just the formatted li prompt=prompt, options=ClaudeAgentOptions( cwd=str(ROOT_DIR), + model=LINT_MODEL, allowed_tools=[], max_turns=2, ), diff --git a/scripts/query.py b/scripts/query.py index fe82ab6..9e87b54 100644 --- a/scripts/query.py +++ b/scripts/query.py @@ -14,16 +14,28 @@ from __future__ import annotations import argparse import asyncio +import os from pathlib import Path from config import KNOWLEDGE_DIR, QA_DIR, now_iso -from utils import load_state, read_all_wiki_content, save_state +from utils import load_state, read_wiki_index, save_state ROOT_DIR = Path(__file__).resolve().parent.parent +# Query model (Sonnet by default — synthesis over the retrieved articles +# benefits from strong reasoning; override via MEMORIA_QUERY_MODEL). +QUERY_MODEL = os.environ.get("MEMORIA_QUERY_MODEL", "sonnet") + async def run_query(question: str, file_back: bool = False) -> str: - """Query the knowledge base and optionally file the answer back.""" + """Query the knowledge base and optionally file the answer back. + + Unlike upstream, we do NOT inline the entire wiki into the prompt — the + LLM receives the index only and uses its Read tool to fetch articles + it decides are relevant. Keeps prompt size bounded regardless of + knowledge-base size and avoids the whole-wiki-in-prompt cost wall + documented in upstream issues #3/#5/#9. + """ from claude_agent_sdk import ( AssistantMessage, ClaudeAgentOptions, @@ -32,7 +44,7 @@ async def run_query(question: str, file_back: bool = False) -> str: query, ) - wiki_content = read_all_wiki_content() + wiki_index = read_wiki_index() tools = ["Read", "Glob", "Grep"] if file_back: @@ -59,20 +71,23 @@ After answering, do the following: """ prompt = f"""You are a knowledge base query engine. Answer the user's question by -consulting the knowledge base below. +consulting the knowledge base. ## How to Answer 1. Read the INDEX section first - it lists every article with a one-line summary 2. Identify 3-10 articles that are relevant to the question -3. Read those articles carefully (they're included below) +3. Use the Read tool to fetch those articles (they live at + {KNOWLEDGE_DIR}/concepts/, {KNOWLEDGE_DIR}/connections/, and + {KNOWLEDGE_DIR}/qa/). Only read articles you actually need — do not + read the entire wiki. 4. Synthesize a clear, thorough answer 5. Cite your sources using [[wikilinks]] (e.g., [[concepts/supabase-auth]]) 6. If the knowledge base doesn't contain relevant information, say so honestly -## Knowledge Base +## Knowledge Base Index -{wiki_content} +{wiki_index} ## Question @@ -87,6 +102,7 @@ consulting the knowledge base below. prompt=prompt, options=ClaudeAgentOptions( cwd=str(ROOT_DIR), + model=QUERY_MODEL, system_prompt={"type": "preset", "preset": "claude_code"}, allowed_tools=tools, permission_mode="acceptEdits",