From 03296be47a9a66b44c074b494ffa8b004d98defb Mon Sep 17 00:00:00 2001
From: agent-admin <agent-admin@realmrei.biz>
Date: Fri, 24 Apr 2026 17:48:48 -0400
Subject: [PATCH] fork: scaling fixes (index-only context + chunking + model
 wiring)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes upstream issues #3/#5/#9 (whole-wiki in every prompt) and adds
large-log chunking. Addresses the audit's P1 scaling findings (C1),
the chunking requirement operator added on top, C8 explicit model
wiring across all LLM call sites, and D3 single-event-loop refactor.

## compile.py

- **Index-only context.** The `existing_articles_context` concatenation
  of every wiki article has been removed from the prompt. Instead the
  LLM receives only the index + schema + daily log and uses the Read
  tool (already in allowed_tools) to fetch specific articles it decides
  are relevant. Prompt size stays bounded regardless of KB growth —
  upstream's 250K-token prompts past ~100 articles are gone.

- **Chunking.** `_split_log_into_chunks()` splits oversized daily logs
  along `### ` section boundaries. Threshold MAX_LOG_CHARS_PER_CHUNK
  (default 100K chars ≈ 25K tokens, configurable via
  MEMORIA_MAX_LOG_CHARS). Chunks compile via separate LLM calls that
  naturally merge through Edit on shared files. Oversized single
  sections emit as their own chunks rather than splitting mid-thought.

- **Atomic state on chunked compile.** State is only written after
  ALL chunks succeed — partial-failure leaves the log flagged as
  uncompiled in state.json so the next run retries it cleanly. Was
  already correct for single-chunk logs (early return on SDK error)
  and now correct for multi-chunk too.

- **Explicit model.** `model=COMPILE_MODEL` passed to
  ClaudeAgentOptions. Default "sonnet"; override via
  MEMORIA_COMPILE_MODEL env var.

- **D3: single asyncio.run.** The per-file `asyncio.run()` in the
  compile loop is replaced with one outer call wrapping `_compile_all`.
  Avoids repeated event-loop setup/teardown and matches the pattern
  used for async resources in the SDK.

## query.py

- **Index-only context.** `read_all_wiki_content()` replaced with
  `read_wiki_index()`. The LLM reads the index and uses its Read tool
  to fetch specific articles. Same rationale as compile.py — keeps
  prompt size bounded and cost predictable.

- **Explicit model.** `model=QUERY_MODEL`, default "sonnet", override
  via MEMORIA_QUERY_MODEL.

## lint.py

- **C9: skip qa/sources in missing-backlink check.** Articles under
  qa/ or sources/ no longer trigger a suggestion that every referenced
  concept should backlink to them. Concepts aren't expected to link
  back to every Q&A that mentions them — doing so would drown real
  relationships.

- **Alias-aware backlink detection.** Uses `extract_wikilinks()` to
  parse the target's link list so `[[concepts/foo|Display]]` forms
  count as valid backlinks (previously required exact `[[foo]]` match,
  causing false positives on aliased forms).

- **Explicit model.** `model=LINT_MODEL` in check_contradictions call,
  default "sonnet", override via MEMORIA_LINT_MODEL.

## Verified

- Chunking: 120K-char 3-section log splits into 80K + 40K, reconstructs
  byte-exact. Oversized single section (150K) emits as its own chunk.
  Small log (<100K) returns as single chunk.
- All patched modules import cleanly with expected config values.
- compile_daily_log / query.run_query / flush.maybe_trigger_compilation
  / lint.check_missing_backlinks all callable post-patch.
---
 scripts/compile.py | 220 +++++++++++++++++++++++++++++++++------------
 scripts/lint.py    |  31 ++++++-
 scripts/query.py   |  30 +++++--
 3 files changed, 213 insertions(+), 68 deletions(-)

diff --git a/scripts/compile.py b/scripts/compile.py
index 43baad6..a0787e9 100644
--- a/scripts/compile.py
+++ b/scripts/compile.py
@@ -15,6 +15,8 @@ from __future__ import annotations
 
 import argparse
 import asyncio
+import os
+import re
 import sys
 from pathlib import Path
 
@@ -31,41 +33,81 @@ from utils import (
 # ── Paths for the LLM to use ──────────────────────────────────────────
 ROOT_DIR = Path(__file__).resolve().parent.parent
 
+# Compilation model (Sonnet by default — knowledge extraction benefits from
+# strong reasoning; override via MEMORIA_COMPILE_MODEL for experiments).
+COMPILE_MODEL = os.environ.get("MEMORIA_COMPILE_MODEL", "sonnet")
 
-async def compile_daily_log(log_path: Path, state: dict) -> float:
-    """Compile a single daily log into knowledge articles.
+# Chunk threshold for large daily logs. Anything above ~100K chars gets
+# split along `### ` section boundaries so a single LLM call never
+# receives the whole log when it's oversized. Each chunk compiles via a
+# fresh Claude invocation; they merge naturally because all writes go
+# through Edit on shared files (index.md, existing concept articles).
+#
+# 100K chars ≈ 25K tokens — well under Claude's context window even
+# after schema + index + instructions + headroom.
+MAX_LOG_CHARS_PER_CHUNK = int(os.environ.get("MEMORIA_MAX_LOG_CHARS", "100000"))
 
-    Returns the API cost of the compilation.
+
+def _split_log_into_chunks(log_content: str, max_chars: int) -> list[str]:
+    """Split a daily log by ### section headers if it exceeds max_chars.
+
+    Returns a list of chunk strings where each chunk is <= max_chars (unless
+    a single section itself exceeds max_chars, in which case the section is
+    emitted as its own oversized chunk — preferable to splitting mid-thought).
+
+    If the whole log is <= max_chars, returns a single-element list.
     """
-    from claude_agent_sdk import (
-        AssistantMessage,
-        ClaudeAgentOptions,
-        ResultMessage,
-        TextBlock,
-        query,
-    )
+    if len(log_content) <= max_chars:
+        return [log_content]
 
-    log_content = log_path.read_text(encoding="utf-8")
+    # Split at ### boundaries, keeping the header attached to its body.
+    parts = re.split(r"(?m)(?=^### )", log_content)
+
+    chunks: list[str] = []
+    current = ""
+    for part in parts:
+        if not part:
+            continue
+        # If this part alone exceeds max_chars, emit it as its own chunk.
+        if len(part) > max_chars:
+            if current:
+                chunks.append(current)
+                current = ""
+            chunks.append(part)
+            continue
+        # If appending would overflow, close out current and start new.
+        if current and len(current) + len(part) > max_chars:
+            chunks.append(current)
+            current = part
+        else:
+            current += part
+
+    if current:
+        chunks.append(current)
+
+    return chunks
+
+
+def _build_prompt(log_name: str, chunk_body: str, *, chunk_info: str = "") -> str:
+    """Assemble the compile prompt.
+
+    Unlike upstream, we do NOT inline every existing article into the prompt
+    — that would send the whole wiki on every call, exploding cost and
+    hitting context limits past ~50 articles (upstream issues #3/#5/#9).
+    Instead, we provide:
+      * the schema (AGENTS.md) — stable structural rules
+      * the current index — lets the compiler identify which concepts exist
+      * the daily log — the new material to compile
+    The compiler uses its Read tool to fetch specific existing articles
+    it deems relevant (index has paths + summaries), keeping prompt size
+    bounded regardless of knowledge-base size.
+    """
     schema = AGENTS_FILE.read_text(encoding="utf-8")
     wiki_index = read_wiki_index()
-
-    # Read existing articles for context
-    existing_articles_context = ""
-    existing = {}
-    for article_path in list_wiki_articles():
-        rel = article_path.relative_to(KNOWLEDGE_DIR)
-        existing[str(rel)] = article_path.read_text(encoding="utf-8")
-
-    if existing:
-        parts = []
-        for rel_path, content in existing.items():
-            parts.append(f"### {rel_path}\n```markdown\n{content}\n```")
-        existing_articles_context = "\n\n".join(parts)
-
     timestamp = now_iso()
 
-    prompt = f"""You are a knowledge compiler. Your job is to read a daily conversation log
-and extract knowledge into structured wiki articles.
+    return f"""You are a knowledge compiler. Your job is to read a daily conversation log
+and extract knowledge into structured wiki articles.{chunk_info}
 
 ## Schema (AGENTS.md)
 
@@ -73,17 +115,19 @@ and extract knowledge into structured wiki articles.
 
 ## Current Wiki Index
 
+The index below lists every existing wiki article with a one-line summary.
+When extracting concepts, check this index first. If a concept already
+exists, use the Read tool to fetch its current content and update it
+rather than duplicating. Only fetch articles you actually need — do not
+read the entire wiki.
+
 {wiki_index}
 
-## Existing Wiki Articles
-
-{existing_articles_context if existing_articles_context else "(No existing articles yet)"}
-
 ## Daily Log to Compile
 
-**File:** {log_path.name}
+**File:** {log_name}
 
-{log_content}
+{chunk_body}
 
 ## Your Task
 
@@ -91,22 +135,25 @@ Read the daily log above and compile it into wiki articles following the schema
 
 ### Rules:
 
-1. **Extract key concepts** - Identify 3-7 distinct concepts worth their own article
-2. **Create concept articles** in `knowledge/concepts/` - One .md file per concept
+1. **Consult the index first.** Identify which concepts in the daily log
+   already have articles (use the Read tool to fetch them) and which are
+   new. Do not list or read the whole wiki — only what's relevant.
+2. **Extract key concepts** - Identify 3-7 distinct concepts worth their own article
+3. **Create concept articles** in `knowledge/concepts/` - One .md file per concept
    - Use the exact article format from AGENTS.md (YAML frontmatter + sections)
    - Include `sources:` in frontmatter pointing to the daily log file
    - Use `[[concepts/slug]]` wikilinks to link to related concepts
    - Write in encyclopedia style - neutral, comprehensive
-3. **Create connection articles** in `knowledge/connections/` if this log reveals non-obvious
+4. **Create connection articles** in `knowledge/connections/` if this log reveals non-obvious
    relationships between 2+ existing concepts
-4. **Update existing articles** if this log adds new information to concepts already in the wiki
+5. **Update existing articles** if this log adds new information to concepts already in the wiki
    - Read the existing article, add the new information, add the source to frontmatter
-5. **Update knowledge/index.md** - Add new entries to the table
+6. **Update knowledge/index.md** - Add new entries to the table
    - Each entry: `| [[path/slug]] | One-line summary | source-file | {timestamp[:10]} |`
-6. **Append to knowledge/log.md** - Add a timestamped entry:
+7. **Append to knowledge/log.md** - Add a timestamped entry:
    ```
-   ## [{timestamp}] compile | {log_path.name}
-   - Source: daily/{log_path.name}
+   ## [{timestamp}] compile | {log_name}
+   - Source: daily/{log_name}
    - Articles created: [[concepts/x]], [[concepts/y]]
    - Articles updated: [[concepts/z]] (if any)
    ```
@@ -126,13 +173,29 @@ Read the daily log above and compile it into wiki articles following the schema
 - Sources section should cite the daily log with specific claims extracted
 """
 
-    cost = 0.0
 
+async def _invoke_llm(prompt: str) -> tuple[float, bool]:
+    """Run one LLM compile pass. Returns (cost_usd, success).
+
+    success=False means the SDK raised an exception — the caller must NOT
+    mark the daily log as compiled in state.json, so the log is retried on
+    the next run rather than silently dropped.
+    """
+    from claude_agent_sdk import (
+        AssistantMessage,
+        ClaudeAgentOptions,
+        ResultMessage,
+        TextBlock,
+        query,
+    )
+
+    cost = 0.0
     try:
         async for message in query(
             prompt=prompt,
             options=ClaudeAgentOptions(
                 cwd=str(ROOT_DIR),
+                model=COMPILE_MODEL,
                 system_prompt={"type": "preset", "preset": "claude_code"},
                 allowed_tools=["Read", "Write", "Edit", "Glob", "Grep"],
                 permission_mode="acceptEdits",
@@ -142,25 +205,63 @@ Read the daily log above and compile it into wiki articles following the schema
             if isinstance(message, AssistantMessage):
                 for block in message.content:
                     if isinstance(block, TextBlock):
-                        pass  # compilation output - LLM writes files directly
+                        pass  # LLM writes files directly via tools
             elif isinstance(message, ResultMessage):
                 cost = message.total_cost_usd or 0.0
-                print(f"  Cost: ${cost:.4f}")
+                print(f"    Cost: ${cost:.4f}")
+        return cost, True
     except Exception as e:
-        print(f"  Error: {e}")
-        return 0.0
+        print(f"    SDK error: {e}")
+        return cost, False
 
-    # Update state
+
+async def compile_daily_log(log_path: Path, state: dict) -> float:
+    """Compile a single daily log into knowledge articles.
+
+    Splits large logs into `### `-bounded chunks before invoking the LLM,
+    so a single call never receives an oversized daily log. State is only
+    updated when ALL chunks succeed — partial failure leaves the log
+    flagged as uncompiled so the next run retries it.
+
+    Returns total API cost of the compilation (sum across chunks).
+    """
+    log_content = log_path.read_text(encoding="utf-8")
+    chunks = _split_log_into_chunks(log_content, MAX_LOG_CHARS_PER_CHUNK)
+
+    total_cost = 0.0
+    all_succeeded = True
+
+    for i, chunk in enumerate(chunks, 1):
+        chunk_info = (
+            f"\n\n(Chunk {i} of {len(chunks)} — compile the sections in this chunk; "
+            "remaining chunks of the same log follow in subsequent calls.)"
+            if len(chunks) > 1
+            else ""
+        )
+        prompt = _build_prompt(log_path.name, chunk, chunk_info=chunk_info)
+        print(f"  Chunk {i}/{len(chunks)} ({len(chunk):,} chars)...")
+        cost, ok = await _invoke_llm(prompt)
+        total_cost += cost
+        if not ok:
+            all_succeeded = False
+            break
+
+    if not all_succeeded:
+        print(f"  FAILED: log not marked compiled; will retry on next run.")
+        return total_cost
+
+    # All chunks succeeded — atomically update state.
     rel_path = log_path.name
     state.setdefault("ingested", {})[rel_path] = {
         "hash": file_hash(log_path),
         "compiled_at": now_iso(),
-        "cost_usd": cost,
+        "cost_usd": total_cost,
+        "chunks": len(chunks),
     }
-    state["total_cost"] = state.get("total_cost", 0.0) + cost
+    state["total_cost"] = state.get("total_cost", 0.0) + total_cost
     save_state(state)
 
-    return cost
+    return total_cost
 
 
 def main():
@@ -207,13 +308,18 @@ def main():
     if args.dry_run:
         return
 
-    # Compile each file sequentially
-    total_cost = 0.0
-    for i, log_path in enumerate(to_compile, 1):
-        print(f"\n[{i}/{len(to_compile)}] Compiling {log_path.name}...")
-        cost = asyncio.run(compile_daily_log(log_path, state))
-        total_cost += cost
-        print(f"  Done.")
+    async def _compile_all() -> float:
+        total = 0.0
+        for i, log_path in enumerate(to_compile, 1):
+            print(f"\n[{i}/{len(to_compile)}] Compiling {log_path.name}...")
+            cost = await compile_daily_log(log_path, state)
+            total += cost
+            print(f"  Done.")
+        return total
+
+    # Single event-loop lifecycle for the whole batch — avoids reinit overhead
+    # and lets any async resources in the SDK settle predictably.
+    total_cost = asyncio.run(_compile_all())
 
     articles = list_wiki_articles()
     print(f"\nCompilation complete. Total cost: ${total_cost:.2f}")
diff --git a/scripts/lint.py b/scripts/lint.py
index 9079eb1..9b6e609 100644
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -13,9 +13,15 @@ from __future__ import annotations
 
 import argparse
 import asyncio
+import os
 from pathlib import Path
 
 from config import KNOWLEDGE_DIR, REPORTS_DIR, now_iso, today_iso
+
+# Contradiction-check model. Kept as Sonnet for reasoning quality; override
+# via MEMORIA_LINT_MODEL (e.g. to use a cheaper model for structural runs
+# that happen to include the LLM check).
+LINT_MODEL = os.environ.get("MEMORIA_LINT_MODEL", "sonnet")
 from utils import (
     count_inbound_links,
     extract_wikilinks,
@@ -105,20 +111,36 @@ def check_stale_articles() -> list[dict]:
 
 
 def check_missing_backlinks() -> list[dict]:
-    """Check for asymmetric links: A links to B but B doesn't link to A."""
+    """Check for asymmetric links: A links to B but B doesn't link to A.
+
+    Skips any source or target under `qa/` or `sources/`: Q&A articles
+    intentionally reference concepts without requiring a reciprocal link
+    (concepts would otherwise accumulate a backlink per question, which
+    drowns real relationships). Also handles pipe-aliased wikilinks via
+    the alias-aware extract_wikilinks helper.
+    """
     issues = []
+    # Aliased/nested variants of the source link that should count as a
+    # valid backlink on the target side: bare slug, and pipe-aliased form.
     for article in list_wiki_articles():
         content = article.read_text(encoding="utf-8")
         rel = article.relative_to(KNOWLEDGE_DIR)
-        source_link = str(rel).replace(".md", "").replace("\\", "/")
+        rel_str = str(rel).replace("\\", "/")
+
+        # Skip one-way source categories.
+        if rel_str.startswith("qa/") or rel_str.startswith("sources/"):
+            continue
+
+        source_link = rel_str.replace(".md", "")
 
         for link in extract_wikilinks(content):
-            if link.startswith("daily/"):
+            if link.startswith("daily/") or link.startswith("qa/") or link.startswith("sources/"):
                 continue
             target_path = KNOWLEDGE_DIR / f"{link}.md"
             if target_path.exists():
                 target_content = target_path.read_text(encoding="utf-8")
-                if f"[[{source_link}]]" not in target_content:
+                target_backlinks = extract_wikilinks(target_content)
+                if source_link not in target_backlinks:
                     issues.append({
                         "severity": "suggestion",
                         "check": "missing_backlink",
@@ -185,6 +207,7 @@ Do NOT output anything else - no preamble, no explanation, just the formatted li
             prompt=prompt,
             options=ClaudeAgentOptions(
                 cwd=str(ROOT_DIR),
+                model=LINT_MODEL,
                 allowed_tools=[],
                 max_turns=2,
             ),
diff --git a/scripts/query.py b/scripts/query.py
index fe82ab6..9e87b54 100644
--- a/scripts/query.py
+++ b/scripts/query.py
@@ -14,16 +14,28 @@ from __future__ import annotations
 
 import argparse
 import asyncio
+import os
 from pathlib import Path
 
 from config import KNOWLEDGE_DIR, QA_DIR, now_iso
-from utils import load_state, read_all_wiki_content, save_state
+from utils import load_state, read_wiki_index, save_state
 
 ROOT_DIR = Path(__file__).resolve().parent.parent
 
+# Query model (Sonnet by default — synthesis over the retrieved articles
+# benefits from strong reasoning; override via MEMORIA_QUERY_MODEL).
+QUERY_MODEL = os.environ.get("MEMORIA_QUERY_MODEL", "sonnet")
+
 
 async def run_query(question: str, file_back: bool = False) -> str:
-    """Query the knowledge base and optionally file the answer back."""
+    """Query the knowledge base and optionally file the answer back.
+
+    Unlike upstream, we do NOT inline the entire wiki into the prompt — the
+    LLM receives the index only and uses its Read tool to fetch articles
+    it decides are relevant. Keeps prompt size bounded regardless of
+    knowledge-base size and avoids the whole-wiki-in-prompt cost wall
+    documented in upstream issues #3/#5/#9.
+    """
     from claude_agent_sdk import (
         AssistantMessage,
         ClaudeAgentOptions,
@@ -32,7 +44,7 @@ async def run_query(question: str, file_back: bool = False) -> str:
         query,
     )
 
-    wiki_content = read_all_wiki_content()
+    wiki_index = read_wiki_index()
 
     tools = ["Read", "Glob", "Grep"]
     if file_back:
@@ -59,20 +71,23 @@ After answering, do the following:
 """
 
     prompt = f"""You are a knowledge base query engine. Answer the user's question by
-consulting the knowledge base below.
+consulting the knowledge base.
 
 ## How to Answer
 
 1. Read the INDEX section first - it lists every article with a one-line summary
 2. Identify 3-10 articles that are relevant to the question
-3. Read those articles carefully (they're included below)
+3. Use the Read tool to fetch those articles (they live at
+   {KNOWLEDGE_DIR}/concepts/, {KNOWLEDGE_DIR}/connections/, and
+   {KNOWLEDGE_DIR}/qa/). Only read articles you actually need — do not
+   read the entire wiki.
 4. Synthesize a clear, thorough answer
 5. Cite your sources using [[wikilinks]] (e.g., [[concepts/supabase-auth]])
 6. If the knowledge base doesn't contain relevant information, say so honestly
 
-## Knowledge Base
+## Knowledge Base Index
 
-{wiki_content}
+{wiki_index}
 
 ## Question
 
@@ -87,6 +102,7 @@ consulting the knowledge base below.
             prompt=prompt,
             options=ClaudeAgentOptions(
                 cwd=str(ROOT_DIR),
+                model=QUERY_MODEL,
                 system_prompt={"type": "preset", "preset": "claude_code"},
                 allowed_tools=tools,
                 permission_mode="acceptEdits",