Acceptance test suite under tests/ covers 8 of the 10 audit-defined
assertions directly (the 2 that require integration-level fixtures —
flush-subprocess-survives-hook-exit and whole-wiki-not-in-prompt
token-count — are documented as manual-test checks rather than
automated).
tests/test_fs_utils.py — 17 tests
* Atomic write: roundtrip, overwrite, original-preserved-on-exception,
parent-dir-creation.
* Locked append: 4 concurrent workers × 25 entries each, asserts every
entry appears exactly once and its body lines are contiguous. This
is the acceptance criterion for "two concurrent flushes don't
interleave writes."
* JSON recovery: clean roundtrip, missing-file default, corruption
produces timestamped .bak and returns default.
* Wikilink parsing: bare / aliased / mixed; parse_wikilink strip.
* Path safety: clean / traversal / absolute / empty / null-byte /
aliased-but-safe.
tests/test_compile_chunking.py — 8 tests
* Chunking: small log passthrough, byte-exact reconstruction,
boundary respect, oversized-single-section, mixed-size packing.
* State-on-failure: single-chunk SDK error does NOT update state;
multi-chunk partial failure does NOT update state; all-chunks
succeed DOES update state with hash + cost.
tests/test_lint_backlinks.py — 4 tests
* Aliased wikilinks aren't flagged as broken links.
* Aliased backlinks count as valid inbound references (the C9 fix).
* QA articles referencing concepts don't trigger backlink suggestions.
* Concept-to-concept asymmetry IS still reported (C9 scope is narrow).
FORK.md — fork-specific docs:
* Summary of delta vs upstream (data-integrity, scaling, correctness,
safety, configurability, hygiene categories)
* Full env-var reference
* Test invocation + coverage summary
* Upstream sync guidance (cherry-pick, don't blind-pull)
Result: 29 passed in 0.07s. All patches in this fork verified via
automated test before any production use.
209 lines
7.6 KiB
Python
209 lines
7.6 KiB
Python
"""Tests for compile.py chunking + state-on-failure behavior.
|
|
|
|
Covers audit assertions #4 (failed SDK call doesn't mark log as compiled)
|
|
and the chunking correctness that underpins #5 (bounded prompt size).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
import compile as compile_mod
|
|
|
|
|
|
# ── Chunking correctness ────────────────────────────────────────────────
|
|
|
|
def test_chunk_small_log_single_chunk() -> None:
|
|
small = "### Section A\nbody\n\n### Section B\nbody\n"
|
|
chunks = compile_mod._split_log_into_chunks(small, max_chars=100_000)
|
|
assert chunks == [small]
|
|
|
|
|
|
def test_chunk_reconstructs_byte_exact() -> None:
|
|
sections = [f"### Section {i}\n" + "x" * 40_000 + "\n" for i in range(3)]
|
|
large = "".join(sections)
|
|
chunks = compile_mod._split_log_into_chunks(large, max_chars=100_000)
|
|
assert "".join(chunks) == large
|
|
|
|
|
|
def test_chunk_respects_section_boundaries() -> None:
|
|
# 4 sections of 30k each; max 70k → each chunk should be a clean
|
|
# boundary cut, never splitting a `### ` header from its body.
|
|
sections = [f"### S{i}\n" + "y" * 30_000 + "\n" for i in range(4)]
|
|
large = "".join(sections)
|
|
chunks = compile_mod._split_log_into_chunks(large, max_chars=70_000)
|
|
# Every chunk that isn't the first should start with `### `.
|
|
for chunk in chunks:
|
|
if chunk != chunks[0]:
|
|
assert chunk.startswith("### "), f"chunk boundary split mid-section: {chunk[:50]!r}"
|
|
|
|
|
|
def test_chunk_oversized_single_section_emits_alone() -> None:
|
|
# Section larger than max_chars should be emitted as its own oversized
|
|
# chunk rather than being split mid-thought.
|
|
huge = "### One big section\n" + "z" * 150_000 + "\n"
|
|
chunks = compile_mod._split_log_into_chunks(huge, max_chars=100_000)
|
|
assert len(chunks) == 1
|
|
assert chunks[0] == huge
|
|
|
|
|
|
def test_chunk_mixed_sizes() -> None:
|
|
# Section headers must anchor at line-start (regex is `(?m)(?=^### )`),
|
|
# so each section body must end with a newline.
|
|
parts = [
|
|
"### A\n" + "a" * 30_000 + "\n",
|
|
"### B\n" + "b" * 30_000 + "\n",
|
|
"### C\n" + "c" * 30_000 + "\n",
|
|
]
|
|
content = "".join(parts) # ~90K
|
|
chunks = compile_mod._split_log_into_chunks(content, max_chars=80_000)
|
|
# First chunk should contain A + B (~60K), second chunk C.
|
|
assert len(chunks) == 2
|
|
assert "### A" in chunks[0] and "### B" in chunks[0]
|
|
assert "### C" in chunks[1]
|
|
# Chunk 2 begins at a clean section boundary.
|
|
assert chunks[1].startswith("### C")
|
|
|
|
|
|
# ── State-on-failure (acceptance #4) ────────────────────────────────────
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_failed_sdk_does_not_mark_compiled(tmp_path: Path, monkeypatch) -> None:
|
|
"""If the SDK raises, compile_daily_log must NOT update state.json.
|
|
|
|
Re-running should still see the log as uncompiled.
|
|
"""
|
|
import config
|
|
import utils
|
|
|
|
# Redirect state file + paths to tmp.
|
|
state_file = tmp_path / "state.json"
|
|
daily_dir = tmp_path / "daily"
|
|
daily_dir.mkdir()
|
|
knowledge_dir = tmp_path / "knowledge"
|
|
knowledge_dir.mkdir()
|
|
|
|
monkeypatch.setattr(utils, "STATE_FILE", state_file)
|
|
|
|
# Make a tiny fake daily log.
|
|
log_path = daily_dir / "2026-04-24-test.md"
|
|
log_path.write_text("### Test\nhello\n")
|
|
|
|
# Make AGENTS.md resolvable (compile.py reads it).
|
|
agents_file = tmp_path / "AGENTS.md"
|
|
agents_file.write_text("# Test Schema\n")
|
|
monkeypatch.setattr(config, "AGENTS_FILE", agents_file)
|
|
monkeypatch.setattr(compile_mod, "AGENTS_FILE", agents_file)
|
|
|
|
# Mock _invoke_llm to always fail.
|
|
async def failing_invoke(_prompt: str) -> tuple[float, bool]:
|
|
return (0.0, False)
|
|
|
|
monkeypatch.setattr(compile_mod, "_invoke_llm", failing_invoke)
|
|
|
|
state: dict[str, Any] = {"ingested": {}, "total_cost": 0.0}
|
|
cost = await compile_mod.compile_daily_log(log_path, state)
|
|
|
|
# Function returns 0.0 cost.
|
|
assert cost == 0.0
|
|
# State was NOT updated with this log's hash.
|
|
assert log_path.name not in state.get("ingested", {})
|
|
# state.json file was NOT written (save_state only runs on full success).
|
|
assert not state_file.exists()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_partial_chunk_failure_does_not_mark_compiled(tmp_path: Path, monkeypatch) -> None:
|
|
"""Multi-chunk log: if chunk 2 fails, log must NOT be marked compiled.
|
|
|
|
Verifies the `all_succeeded` gate holds across chunks.
|
|
"""
|
|
import config
|
|
import utils
|
|
|
|
state_file = tmp_path / "state.json"
|
|
daily_dir = tmp_path / "daily"
|
|
daily_dir.mkdir()
|
|
(tmp_path / "knowledge").mkdir()
|
|
(tmp_path / "knowledge" / "index.md").write_text("(empty)")
|
|
|
|
monkeypatch.setattr(utils, "STATE_FILE", state_file)
|
|
|
|
# Log large enough to chunk (>100K) via 3 sections.
|
|
sections = [f"### Section {i}\n" + "x" * 40_000 + "\n" for i in range(3)]
|
|
log_path = daily_dir / "2026-04-24-big.md"
|
|
log_path.write_text("".join(sections))
|
|
|
|
agents_file = tmp_path / "AGENTS.md"
|
|
agents_file.write_text("# Test Schema\n")
|
|
monkeypatch.setattr(config, "AGENTS_FILE", agents_file)
|
|
monkeypatch.setattr(compile_mod, "AGENTS_FILE", agents_file)
|
|
|
|
# Patch read_wiki_index to return a dummy value (avoid hitting the real
|
|
# knowledge dir).
|
|
monkeypatch.setattr(compile_mod, "read_wiki_index", lambda: "(test index)")
|
|
|
|
# _invoke_llm: succeed on 1st chunk, fail on 2nd. Track calls.
|
|
calls = {"n": 0}
|
|
|
|
async def flaky_invoke(_prompt: str) -> tuple[float, bool]:
|
|
calls["n"] += 1
|
|
if calls["n"] == 1:
|
|
return (0.25, True) # chunk 1 ok
|
|
return (0.0, False) # chunk 2 fails
|
|
|
|
monkeypatch.setattr(compile_mod, "_invoke_llm", flaky_invoke)
|
|
|
|
state: dict[str, Any] = {"ingested": {}, "total_cost": 0.0}
|
|
cost = await compile_mod.compile_daily_log(log_path, state)
|
|
|
|
# Returned partial cost (chunk 1 actually cost something).
|
|
assert cost == 0.25
|
|
# But state NOT updated — log is still "uncompiled" from the outside.
|
|
assert log_path.name not in state.get("ingested", {})
|
|
assert not state_file.exists()
|
|
# Exactly 2 chunk attempts (first success, second failure, then bail).
|
|
assert calls["n"] == 2
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_all_chunks_succeed_updates_state(tmp_path: Path, monkeypatch) -> None:
|
|
"""When every chunk succeeds, state IS updated with the hash."""
|
|
import config
|
|
import utils
|
|
|
|
state_file = tmp_path / "state.json"
|
|
daily_dir = tmp_path / "daily"
|
|
daily_dir.mkdir()
|
|
(tmp_path / "knowledge").mkdir()
|
|
|
|
monkeypatch.setattr(utils, "STATE_FILE", state_file)
|
|
|
|
log_path = daily_dir / "2026-04-24-ok.md"
|
|
log_path.write_text("### Section\nhi\n")
|
|
|
|
agents_file = tmp_path / "AGENTS.md"
|
|
agents_file.write_text("# Schema\n")
|
|
monkeypatch.setattr(config, "AGENTS_FILE", agents_file)
|
|
monkeypatch.setattr(compile_mod, "AGENTS_FILE", agents_file)
|
|
monkeypatch.setattr(compile_mod, "read_wiki_index", lambda: "(index)")
|
|
|
|
async def good_invoke(_prompt: str) -> tuple[float, bool]:
|
|
return (0.10, True)
|
|
|
|
monkeypatch.setattr(compile_mod, "_invoke_llm", good_invoke)
|
|
|
|
state: dict[str, Any] = {"ingested": {}, "total_cost": 0.0}
|
|
cost = await compile_mod.compile_daily_log(log_path, state)
|
|
|
|
assert cost == 0.10
|
|
assert log_path.name in state["ingested"]
|
|
assert state["ingested"][log_path.name]["cost_usd"] == 0.10
|
|
assert state["total_cost"] == 0.10
|
|
# state.json now exists on disk.
|
|
assert state_file.exists()
|