"""Tests for compile.py chunking + state-on-failure behavior. Covers audit assertions #4 (failed SDK call doesn't mark log as compiled) and the chunking correctness that underpins #5 (bounded prompt size). """ from __future__ import annotations import asyncio from pathlib import Path from typing import Any from unittest.mock import patch import pytest import compile as compile_mod # ── Chunking correctness ──────────────────────────────────────────────── def test_chunk_small_log_single_chunk() -> None: small = "### Section A\nbody\n\n### Section B\nbody\n" chunks = compile_mod._split_log_into_chunks(small, max_chars=100_000) assert chunks == [small] def test_chunk_reconstructs_byte_exact() -> None: sections = [f"### Section {i}\n" + "x" * 40_000 + "\n" for i in range(3)] large = "".join(sections) chunks = compile_mod._split_log_into_chunks(large, max_chars=100_000) assert "".join(chunks) == large def test_chunk_respects_section_boundaries() -> None: # 4 sections of 30k each; max 70k → each chunk should be a clean # boundary cut, never splitting a `### ` header from its body. sections = [f"### S{i}\n" + "y" * 30_000 + "\n" for i in range(4)] large = "".join(sections) chunks = compile_mod._split_log_into_chunks(large, max_chars=70_000) # Every chunk that isn't the first should start with `### `. for chunk in chunks: if chunk != chunks[0]: assert chunk.startswith("### "), f"chunk boundary split mid-section: {chunk[:50]!r}" def test_chunk_oversized_single_section_emits_alone() -> None: # Section larger than max_chars should be emitted as its own oversized # chunk rather than being split mid-thought. huge = "### One big section\n" + "z" * 150_000 + "\n" chunks = compile_mod._split_log_into_chunks(huge, max_chars=100_000) assert len(chunks) == 1 assert chunks[0] == huge def test_chunk_mixed_sizes() -> None: # Section headers must anchor at line-start (regex is `(?m)(?=^### )`), # so each section body must end with a newline. parts = [ "### A\n" + "a" * 30_000 + "\n", "### B\n" + "b" * 30_000 + "\n", "### C\n" + "c" * 30_000 + "\n", ] content = "".join(parts) # ~90K chunks = compile_mod._split_log_into_chunks(content, max_chars=80_000) # First chunk should contain A + B (~60K), second chunk C. assert len(chunks) == 2 assert "### A" in chunks[0] and "### B" in chunks[0] assert "### C" in chunks[1] # Chunk 2 begins at a clean section boundary. assert chunks[1].startswith("### C") # ── State-on-failure (acceptance #4) ──────────────────────────────────── @pytest.mark.asyncio async def test_failed_sdk_does_not_mark_compiled(tmp_path: Path, monkeypatch) -> None: """If the SDK raises, compile_daily_log must NOT update state.json. Re-running should still see the log as uncompiled. """ import config import utils # Redirect state file + paths to tmp. state_file = tmp_path / "state.json" daily_dir = tmp_path / "daily" daily_dir.mkdir() knowledge_dir = tmp_path / "knowledge" knowledge_dir.mkdir() monkeypatch.setattr(utils, "STATE_FILE", state_file) # Make a tiny fake daily log. log_path = daily_dir / "2026-04-24-test.md" log_path.write_text("### Test\nhello\n") # Make AGENTS.md resolvable (compile.py reads it). agents_file = tmp_path / "AGENTS.md" agents_file.write_text("# Test Schema\n") monkeypatch.setattr(config, "AGENTS_FILE", agents_file) monkeypatch.setattr(compile_mod, "AGENTS_FILE", agents_file) # Mock _invoke_llm to always fail. async def failing_invoke(_prompt: str) -> tuple[float, bool]: return (0.0, False) monkeypatch.setattr(compile_mod, "_invoke_llm", failing_invoke) state: dict[str, Any] = {"ingested": {}, "total_cost": 0.0} cost = await compile_mod.compile_daily_log(log_path, state) # Function returns 0.0 cost. assert cost == 0.0 # State was NOT updated with this log's hash. assert log_path.name not in state.get("ingested", {}) # state.json file was NOT written (save_state only runs on full success). assert not state_file.exists() @pytest.mark.asyncio async def test_partial_chunk_failure_does_not_mark_compiled(tmp_path: Path, monkeypatch) -> None: """Multi-chunk log: if chunk 2 fails, log must NOT be marked compiled. Verifies the `all_succeeded` gate holds across chunks. """ import config import utils state_file = tmp_path / "state.json" daily_dir = tmp_path / "daily" daily_dir.mkdir() (tmp_path / "knowledge").mkdir() (tmp_path / "knowledge" / "index.md").write_text("(empty)") monkeypatch.setattr(utils, "STATE_FILE", state_file) # Log large enough to chunk (>100K) via 3 sections. sections = [f"### Section {i}\n" + "x" * 40_000 + "\n" for i in range(3)] log_path = daily_dir / "2026-04-24-big.md" log_path.write_text("".join(sections)) agents_file = tmp_path / "AGENTS.md" agents_file.write_text("# Test Schema\n") monkeypatch.setattr(config, "AGENTS_FILE", agents_file) monkeypatch.setattr(compile_mod, "AGENTS_FILE", agents_file) # Patch read_wiki_index to return a dummy value (avoid hitting the real # knowledge dir). monkeypatch.setattr(compile_mod, "read_wiki_index", lambda: "(test index)") # _invoke_llm: succeed on 1st chunk, fail on 2nd. Track calls. calls = {"n": 0} async def flaky_invoke(_prompt: str) -> tuple[float, bool]: calls["n"] += 1 if calls["n"] == 1: return (0.25, True) # chunk 1 ok return (0.0, False) # chunk 2 fails monkeypatch.setattr(compile_mod, "_invoke_llm", flaky_invoke) state: dict[str, Any] = {"ingested": {}, "total_cost": 0.0} cost = await compile_mod.compile_daily_log(log_path, state) # Returned partial cost (chunk 1 actually cost something). assert cost == 0.25 # But state NOT updated — log is still "uncompiled" from the outside. assert log_path.name not in state.get("ingested", {}) assert not state_file.exists() # Exactly 2 chunk attempts (first success, second failure, then bail). assert calls["n"] == 2 @pytest.mark.asyncio async def test_all_chunks_succeed_updates_state(tmp_path: Path, monkeypatch) -> None: """When every chunk succeeds, state IS updated with the hash.""" import config import utils state_file = tmp_path / "state.json" daily_dir = tmp_path / "daily" daily_dir.mkdir() (tmp_path / "knowledge").mkdir() monkeypatch.setattr(utils, "STATE_FILE", state_file) log_path = daily_dir / "2026-04-24-ok.md" log_path.write_text("### Section\nhi\n") agents_file = tmp_path / "AGENTS.md" agents_file.write_text("# Schema\n") monkeypatch.setattr(config, "AGENTS_FILE", agents_file) monkeypatch.setattr(compile_mod, "AGENTS_FILE", agents_file) monkeypatch.setattr(compile_mod, "read_wiki_index", lambda: "(index)") async def good_invoke(_prompt: str) -> tuple[float, bool]: return (0.10, True) monkeypatch.setattr(compile_mod, "_invoke_llm", good_invoke) state: dict[str, Any] = {"ingested": {}, "total_cost": 0.0} cost = await compile_mod.compile_daily_log(log_path, state) assert cost == 0.10 assert log_path.name in state["ingested"] assert state["ingested"][log_path.name]["cost_usd"] == 0.10 assert state["total_cost"] == 0.10 # state.json now exists on disk. assert state_file.exists()