memoria/scripts/flush.py

"""
Memory flush agent - extracts important knowledge from conversation context.

Spawned by session-end.py or pre-compact.py as a background process. Reads
pre-extracted conversation context from a .md file, uses the Claude Agent SDK
to decide what's worth saving, and appends the result to today's daily log.

Usage:
    uv run python flush.py <context_file.md> <session_id>
"""

from __future__ import annotations

# Recursion prevention: set this BEFORE any imports that might trigger Claude
import os
os.environ["CLAUDE_INVOKED_BY"] = "memory_flush"

import asyncio
import json
import logging
import sys
import time
from datetime import datetime
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
SCRIPTS_DIR = ROOT / "scripts"
sys.path.insert(0, str(SCRIPTS_DIR))

from config import DAILY_DIR, _now_local  # noqa: E402
from fs_utils import atomic_write_text, load_json_with_recovery, locked_append_text  # noqa: E402

STATE_FILE = SCRIPTS_DIR / "last-flush.json"
LOG_FILE = SCRIPTS_DIR / "flush.log"

# Staleness-based auto-compile gate (replaces the upstream 6 PM wall-clock gate).
# Compile fires when the daily log has changed AND enough time has passed since
# the last compile of that log. Configurable via env; default is 1 hour.
COMPILE_INTERVAL_MIN = int(os.environ.get("MEMORIA_COMPILE_INTERVAL_MIN", "60"))

# SDK retry tuning.
MAX_SDK_ATTEMPTS = 3
SDK_BACKOFF_BASE_SEC = 2.0

# Set up file-based logging so we can verify the background process ran.
# The parent process sends stdout/stderr to DEVNULL (to avoid the inherited
# file handle bug on Windows), so this is our only observability channel.
logging.basicConfig(
    filename=str(LOG_FILE),
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)


def load_flush_state() -> dict:
    """Load dedup state; recover from corruption with a timestamped backup."""
    return load_json_with_recovery(STATE_FILE, {}, logger=logging.getLogger())


def save_flush_state(state: dict) -> None:
    """Persist dedup state atomically (tmp + fsync + rename)."""
    atomic_write_text(STATE_FILE, json.dumps(state))


def append_to_daily_log(content: str, section: str = "Session") -> None:
    """Append content to today's daily log under an exclusive file lock.

    Concurrent callers (SessionEnd + PreCompact firing within seconds of
    each other) serialize through the lock; full entries never interleave.
    """
    today = _now_local()
    log_path = DAILY_DIR / f"{today.strftime('%Y-%m-%d')}.md"

    if not log_path.exists():
        DAILY_DIR.mkdir(parents=True, exist_ok=True)
        # Initial header write is also lock-safe: if two concurrent flushes
        # both pass the `if not log_path.exists()` check, the second write
        # is a harmless no-op append of the header (the race creates a minor
        # duplicate header at most, never corruption).
        atomic_write_text(
            log_path,
            f"# Daily Log: {today.strftime('%Y-%m-%d')}\n\n## Sessions\n\n## Memory Maintenance\n\n",
        )

    time_str = today.strftime("%H:%M")
    entry = f"### {section} ({time_str})\n\n{content}\n\n"

    locked_append_text(log_path, entry)


async def run_flush(context: str) -> str:
    """Use Claude Agent SDK to extract important knowledge from conversation context.

    Retries up to MAX_SDK_ATTEMPTS on exceptions, with exponential backoff.
    Returns the LLM's response text on success, or a `FLUSH_ERROR:` sentinel
    string on final failure — the caller treats that as "retry later" and
    preserves the source context file rather than deleting it.
    """
    from claude_agent_sdk import (
        AssistantMessage,
        ClaudeAgentOptions,
        ResultMessage,
        TextBlock,
        query,
    )

    prompt = f"""Review the conversation context below and respond with a concise summary
of important items that should be preserved in the daily log.
Do NOT use any tools — just return plain text.

Format your response as a structured daily log entry with these sections:

**Context:** [One line about what the user was working on]

**Key Exchanges:**
- [Important Q&A or discussions]

**Decisions Made:**
- [Any decisions with rationale]

**Lessons Learned:**
- [Gotchas, patterns, or insights discovered]

**Action Items:**
- [Follow-ups or TODOs mentioned]

Skip anything that is:
- Routine tool calls or file reads
- Content that's trivial or obvious
- Trivial back-and-forth or clarification exchanges

Only include sections that have actual content. If nothing is worth saving,
respond with exactly: FLUSH_OK

## Conversation Context

{context}"""

    last_exc: Exception | None = None
    for attempt in range(1, MAX_SDK_ATTEMPTS + 1):
        response = ""
        try:
            async for message in query(
                prompt=prompt,
                options=ClaudeAgentOptions(
                    cwd=str(ROOT),
                    model="haiku",  # flush is short-form summarization — haiku is plenty
                    allowed_tools=[],
                    max_turns=2,
                ),
            ):
                if isinstance(message, AssistantMessage):
                    for block in message.content:
                        if isinstance(block, TextBlock):
                            response += block.text
                elif isinstance(message, ResultMessage):
                    pass
            return response
        except Exception as exc:
            import traceback
            last_exc = exc
            logging.warning(
                "Agent SDK error on attempt %d/%d: %s",
                attempt,
                MAX_SDK_ATTEMPTS,
                exc,
            )
            if attempt < MAX_SDK_ATTEMPTS:
                delay = SDK_BACKOFF_BASE_SEC * (2 ** (attempt - 1))
                await asyncio.sleep(delay)
            else:
                logging.error("Agent SDK failed all retries:\n%s", traceback.format_exc())

    # All retries exhausted. Return the error sentinel; caller preserves
    # the context file for a later retry.
    assert last_exc is not None
    return f"FLUSH_ERROR: {type(last_exc).__name__}: {last_exc}"


def maybe_trigger_compilation() -> None:
    """Spawn compile.py if today's log has changed and compile-interval has elapsed.

    Replaces the upstream 6 PM wall-clock gate with a staleness-based trigger:
    compile fires if the daily log's content hash differs from the last-compiled
    hash AND at least COMPILE_INTERVAL_MIN minutes have passed since the last
    compile of that log (or it has never been compiled).

    No-op if compile.py isn't found. Errors are logged but non-fatal.
    """
    import subprocess as _sp
    from hashlib import sha256

    now = _now_local()
    today_log = f"{now.strftime('%Y-%m-%d')}.md"
    log_path = DAILY_DIR / today_log
    if not log_path.exists():
        return

    compile_state_file = SCRIPTS_DIR / "state.json"
    if compile_state_file.exists():
        compile_state = load_json_with_recovery(
            compile_state_file, {}, logger=logging.getLogger()
        )
        ingested = compile_state.get("ingested", {}) if isinstance(compile_state, dict) else {}
        entry = ingested.get(today_log, {})

        # Compile only if content actually changed.
        current_hash = sha256(log_path.read_bytes()).hexdigest()[:16]
        if entry.get("hash") == current_hash:
            return  # log unchanged since last compile

        # Rate-limit: require COMPILE_INTERVAL_MIN elapsed since last compile.
        compiled_at_str = entry.get("compiled_at")
        if compiled_at_str:
            try:
                compiled_at = datetime.fromisoformat(compiled_at_str)
                # Strip tz if the caller wrote a naive datetime — don't crash on mismatched aware/naive.
                if compiled_at.tzinfo is None:
                    compiled_at = compiled_at.replace(tzinfo=now.tzinfo)
                elapsed_min = (now - compiled_at).total_seconds() / 60.0
                if elapsed_min < COMPILE_INTERVAL_MIN:
                    logging.info(
                        "Skip compile: %s changed but only %.1f min since last compile (threshold %d)",
                        today_log,
                        elapsed_min,
                        COMPILE_INTERVAL_MIN,
                    )
                    return
            except (ValueError, TypeError) as e:
                logging.warning("Could not parse compiled_at %r: %s", compiled_at_str, e)
                # Proceed with compile; a malformed timestamp should not block progress.

    compile_script = SCRIPTS_DIR / "compile.py"
    if not compile_script.exists():
        return

    logging.info("Compilation triggered for %s (staleness gate)", today_log)

    cmd = ["uv", "run", "--directory", str(ROOT), "python", str(compile_script)]

    kwargs: dict = {}
    if sys.platform == "win32":
        kwargs["creationflags"] = _sp.CREATE_NEW_PROCESS_GROUP | _sp.DETACHED_PROCESS
    else:
        kwargs["start_new_session"] = True

    # Use a context manager so the log handle is always cleaned up, even on
    # Popen failure. Popen inherits the fd via duplication; closing our copy
    # is safe and cleaner than leaking the handle.
    try:
        with open(str(SCRIPTS_DIR / "compile.log"), "a", encoding="utf-8") as log_handle:
            _sp.Popen(cmd, stdout=log_handle, stderr=_sp.STDOUT, cwd=str(ROOT), **kwargs)
    except Exception as e:
        logging.error("Failed to spawn compile.py: %s", e)


def main():
    if len(sys.argv) < 3:
        logging.error("Usage: %s <context_file.md> <session_id>", sys.argv[0])
        sys.exit(1)

    context_file = Path(sys.argv[1])
    session_id = sys.argv[2]

    logging.info("flush.py started for session %s, context: %s", session_id, context_file)

    if not context_file.exists():
        logging.error("Context file not found: %s", context_file)
        return

    # Deduplication: skip if same session was flushed within 60 seconds
    state = load_flush_state()
    if (
        state.get("session_id") == session_id
        and time.time() - state.get("timestamp", 0) < 60
    ):
        logging.info("Skipping duplicate flush for session %s", session_id)
        context_file.unlink(missing_ok=True)
        return

    # Read pre-extracted context
    context = context_file.read_text(encoding="utf-8").strip()
    if not context:
        logging.info("Context file is empty, skipping")
        context_file.unlink(missing_ok=True)
        return

    logging.info("Flushing session %s: %d chars", session_id, len(context))

    # Run the LLM extraction (with built-in retry). run_flush returns either
    # the structured summary, the literal string "FLUSH_OK" for skip-cases,
    # or a "FLUSH_ERROR: ..." sentinel indicating all SDK retries failed.
    response = asyncio.run(run_flush(context))

    # On permanent SDK failure: preserve the context file for manual retry,
    # do NOT update dedup state (so next flush for this session can retry),
    # and do NOT trigger compilation (there's nothing new to compile).
    if response.startswith("FLUSH_ERROR"):
        logging.error(
            "Flush failed for session %s after retries. Context preserved at %s. Response: %s",
            session_id,
            context_file,
            response,
        )
        return

    # Append the summary to today's daily log.
    if "FLUSH_OK" in response:
        logging.info("Result: FLUSH_OK")
        append_to_daily_log(
            "FLUSH_OK - Nothing worth saving from this session", "Memory Flush"
        )
    else:
        logging.info("Result: saved to daily log (%d chars)", len(response))
        append_to_daily_log(response, "Session")

    # Update dedup state (atomic) so a duplicate flush within 60s is suppressed.
    save_flush_state({"session_id": session_id, "timestamp": time.time()})

    # Clean up context file — only on successful append.
    context_file.unlink(missing_ok=True)

    # Staleness-triggered auto-compilation: fires if today's log has changed
    # and the compile-interval has elapsed. See maybe_trigger_compilation.
    maybe_trigger_compilation()

    logging.info("Flush complete for session %s", session_id)


if __name__ == "__main__":
    main()