133 lines
4.6 KiB
Python
133 lines
4.6 KiB
Python
"""Shared utilities for the personal knowledge base."""
|
|
|
|
import hashlib
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from config import (
|
|
CONCEPTS_DIR,
|
|
CONNECTIONS_DIR,
|
|
DAILY_DIR,
|
|
INDEX_FILE,
|
|
KNOWLEDGE_DIR,
|
|
LOG_FILE,
|
|
QA_DIR,
|
|
STATE_FILE,
|
|
)
|
|
|
|
|
|
# ── State management ──────────────────────────────────────────────────
|
|
|
|
def load_state() -> dict:
|
|
"""Load persistent state from state.json."""
|
|
if STATE_FILE.exists():
|
|
return json.loads(STATE_FILE.read_text(encoding="utf-8"))
|
|
return {"ingested": {}, "query_count": 0, "last_lint": None, "total_cost": 0.0}
|
|
|
|
|
|
def save_state(state: dict) -> None:
|
|
"""Save state to state.json."""
|
|
STATE_FILE.write_text(json.dumps(state, indent=2), encoding="utf-8")
|
|
|
|
|
|
# ── File hashing ──────────────────────────────────────────────────────
|
|
|
|
def file_hash(path: Path) -> str:
|
|
"""SHA-256 hash of a file (first 16 hex chars)."""
|
|
return hashlib.sha256(path.read_bytes()).hexdigest()[:16]
|
|
|
|
|
|
# ── Slug / naming ─────────────────────────────────────────────────────
|
|
|
|
def slugify(text: str) -> str:
|
|
"""Convert text to a filename-safe slug."""
|
|
text = text.lower().strip()
|
|
text = re.sub(r"[^\w\s-]", "", text)
|
|
text = re.sub(r"[\s_]+", "-", text)
|
|
text = re.sub(r"-+", "-", text)
|
|
return text.strip("-")
|
|
|
|
|
|
# ── Wikilink helpers ──────────────────────────────────────────────────
|
|
|
|
def extract_wikilinks(content: str) -> list[str]:
|
|
"""Extract all [[wikilinks]] from markdown content."""
|
|
return re.findall(r"\[\[([^\]]+)\]\]", content)
|
|
|
|
|
|
def wiki_article_exists(link: str) -> bool:
|
|
"""Check if a wikilinked article exists on disk."""
|
|
path = KNOWLEDGE_DIR / f"{link}.md"
|
|
return path.exists()
|
|
|
|
|
|
# ── Wiki content helpers ──────────────────────────────────────────────
|
|
|
|
def read_wiki_index() -> str:
|
|
"""Read the knowledge base index file."""
|
|
if INDEX_FILE.exists():
|
|
return INDEX_FILE.read_text(encoding="utf-8")
|
|
return "# Knowledge Base Index\n\n| Article | Summary | Compiled From | Updated |\n|---------|---------|---------------|---------|"
|
|
|
|
|
|
def read_all_wiki_content() -> str:
|
|
"""Read index + all wiki articles into a single string for context."""
|
|
parts = [f"## INDEX\n\n{read_wiki_index()}"]
|
|
|
|
for subdir in [CONCEPTS_DIR, CONNECTIONS_DIR, QA_DIR]:
|
|
if not subdir.exists():
|
|
continue
|
|
for md_file in sorted(subdir.glob("*.md")):
|
|
rel = md_file.relative_to(KNOWLEDGE_DIR)
|
|
content = md_file.read_text(encoding="utf-8")
|
|
parts.append(f"## {rel}\n\n{content}")
|
|
|
|
return "\n\n---\n\n".join(parts)
|
|
|
|
|
|
def list_wiki_articles() -> list[Path]:
|
|
"""List all wiki article files."""
|
|
articles = []
|
|
for subdir in [CONCEPTS_DIR, CONNECTIONS_DIR, QA_DIR]:
|
|
if subdir.exists():
|
|
articles.extend(sorted(subdir.glob("*.md")))
|
|
return articles
|
|
|
|
|
|
def list_raw_files() -> list[Path]:
|
|
"""List all daily log files."""
|
|
if not DAILY_DIR.exists():
|
|
return []
|
|
return sorted(DAILY_DIR.glob("*.md"))
|
|
|
|
|
|
# ── Index helpers ─────────────────────────────────────────────────────
|
|
|
|
def count_inbound_links(target: str, exclude_file: Path | None = None) -> int:
|
|
"""Count how many wiki articles link to a given target."""
|
|
count = 0
|
|
for article in list_wiki_articles():
|
|
if article == exclude_file:
|
|
continue
|
|
content = article.read_text(encoding="utf-8")
|
|
if f"[[{target}]]" in content:
|
|
count += 1
|
|
return count
|
|
|
|
|
|
def get_article_word_count(path: Path) -> int:
|
|
"""Count words in an article, excluding YAML frontmatter."""
|
|
content = path.read_text(encoding="utf-8")
|
|
# Strip frontmatter
|
|
if content.startswith("---"):
|
|
end = content.find("---", 3)
|
|
if end != -1:
|
|
content = content[end + 3:]
|
|
return len(content.split())
|
|
|
|
|
|
def build_index_entry(rel_path: str, summary: str, sources: str, updated: str) -> str:
|
|
"""Build a single index table row."""
|
|
link = rel_path.replace(".md", "")
|
|
return f"| [[{link}]] | {summary} | {sources} | {updated} |"
|