Files
chat/chat/web/snapshots.py
T

217 lines
7.7 KiB
Python

"""Snapshot UX routes (T99).
Surfaces the existing snapshot service (``chat/services/snapshot.py``)
through HTML so the user can see, take, restore, and preview snapshots
without dropping to a shell.
Routes:
* ``GET /snapshots`` list all snapshots (both kinds)
* ``POST /snapshots/take`` take a periodic snapshot now
* ``POST /snapshots/restore/{id}`` restore (requires matching ``confirm_id`` and ``kind``)
* ``GET /snapshots/{id}/preview`` show metadata + delta vs current
The ``snapshot_id`` is the filename stem (the UTC timestamp written by
:func:`chat.services.snapshot.take_snapshot`) — there's no separate UUID,
and the timestamp filename is already unique per snapshot kind. Both
periodic and rewind snapshots share the same id space lookup-wise, so
the restore + preview routes require ``kind`` as a form/query param to
disambiguate (a missing/empty ``kind`` is a 400, not a silent default).
Note on ``created_at`` mtime drift: the listing's ``created_at`` comes
from the file's mtime, not the encoded filename timestamp. ``cp -p``
preserves mtime, but plain ``cp`` resets it to "now" — so a copied
snapshot can show a misleading ``created_at`` while its filename still
reflects the original UTC capture time.
"""
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from fastapi import APIRouter, Depends, Form, HTTPException, Request
from fastapi.responses import HTMLResponse, RedirectResponse
from fastapi.templating import Jinja2Templates
from chat.services.snapshot import (
restore_from_snapshot,
take_snapshot,
)
from chat.web.bots import get_conn
TEMPLATES = Jinja2Templates(
directory=str(Path(__file__).resolve().parent.parent / "templates")
)
router = APIRouter()
SNAPSHOT_KINDS = ("periodic", "rewind")
def _list_all_snapshots(data_dir: Path) -> list[dict]:
"""Walk ``data/snapshots/{kind}/`` for both kinds and collect metadata.
Each entry exposes the fields the template needs: ``snapshot_id``
(filename stem), ``kind``, ``created_at`` (file mtime as ISO), the
on-disk ``file_size_bytes``, and the snapshot's stored
``last_event_id`` (parsed from the JSON body — small enough that
listing isn't a performance concern for the handful of files we keep).
"""
rows: list[dict] = []
for kind in SNAPSHOT_KINDS:
snap_dir = data_dir / "snapshots" / kind
if not snap_dir.exists():
continue
for path in sorted(snap_dir.glob("*.json")):
try:
dump = json.loads(path.read_text())
last_event_id = dump.get("last_event_id", 0)
except (OSError, json.JSONDecodeError):
# Corrupt or unreadable files still get listed so the
# user can see and delete them; just don't crash here.
last_event_id = None
stat = path.stat()
rows.append(
{
"snapshot_id": path.stem,
"kind": kind,
"created_at": datetime.fromtimestamp(
stat.st_mtime, tz=timezone.utc
).isoformat(),
"file_size_bytes": stat.st_size,
"last_event_id": last_event_id,
"filename": path.name,
}
)
# Newest first for display.
rows.sort(key=lambda r: r["created_at"], reverse=True)
return rows
def _require_kind(kind: str) -> str:
"""Reject missing/empty/unknown ``kind`` with 400.
Defaulting silently to ``"periodic"`` made rewind-snapshot lookups
appear as 404s, which is confusing — make the client always state
the kind explicitly.
"""
if not kind or kind not in SNAPSHOT_KINDS:
raise HTTPException(
status_code=400,
detail=f"kind must be one of {SNAPSHOT_KINDS}",
)
return kind
def _resolve_snapshot_path(
data_dir: Path, snapshot_id: str, kind: str
) -> Path:
"""Map an ``(id, kind)`` pair to the on-disk file, or 404."""
_require_kind(kind)
path = data_dir / "snapshots" / kind / f"{snapshot_id}.json"
if not path.exists():
raise HTTPException(status_code=404, detail="snapshot not found")
return path
@router.get("/snapshots", response_class=HTMLResponse)
async def snapshots_list(request: Request):
settings = request.app.state.settings
rows = _list_all_snapshots(settings.data_dir)
return TEMPLATES.TemplateResponse(
request,
"snapshots.html",
{"snapshots": rows, "active_nav": "snapshots"},
)
@router.post("/snapshots/take")
async def snapshots_take(request: Request, conn=Depends(get_conn)):
"""Take a periodic snapshot now.
We use ``kind="periodic"`` for manual snapshots since they're
user-initiated checkpoints, not pre-rewind safety dumps. They count
against the 5-snapshot retention but that's fine — manual ones are
the most recent so they're the last to be pruned.
"""
settings = request.app.state.settings
take_snapshot(conn, data_dir=settings.data_dir, kind="periodic")
return RedirectResponse(url="/snapshots", status_code=303)
@router.post("/snapshots/restore/{snapshot_id}")
async def snapshots_restore(
snapshot_id: str,
request: Request,
confirm_id: str = Form(""),
kind: str = Form(""),
conn=Depends(get_conn),
):
"""Hard-confirm restore: ``confirm_id`` must equal the path id.
Mismatched confirm → 400 (without touching the DB). On match, the
existing :func:`restore_from_snapshot` clears projected tables and
re-loads them from the dump.
``kind`` is required (must be ``"periodic"`` or ``"rewind"``) — a
missing or empty value 400s rather than silently defaulting.
"""
_require_kind(kind)
if confirm_id != snapshot_id:
raise HTTPException(
status_code=400,
detail="confirm_id does not match snapshot id",
)
settings = request.app.state.settings
path = _resolve_snapshot_path(settings.data_dir, snapshot_id, kind)
restore_from_snapshot(conn, path)
return RedirectResponse(url="/snapshots", status_code=303)
@router.get("/snapshots/{snapshot_id}/preview", response_class=HTMLResponse)
async def snapshots_preview(
snapshot_id: str,
request: Request,
kind: str = "",
conn=Depends(get_conn),
):
"""Show snapshot metadata + a basic delta against the current event log.
Phase 4 keeps this simple: the snapshot's ``last_event_id`` plus the
current ``MAX(event_log.id)`` is enough to tell the user how far the
log has moved on. A richer per-table diff is a Phase 4.5+ concern.
``kind`` is required — see :func:`snapshots_restore`.
"""
_require_kind(kind)
settings = request.app.state.settings
path = _resolve_snapshot_path(settings.data_dir, snapshot_id, kind)
dump = json.loads(path.read_text())
last_event_id = dump.get("last_event_id", 0)
cur = conn.execute("SELECT MAX(id) FROM event_log")
row = cur.fetchone()
current_max_id = row[0] if row[0] is not None else 0
stat = path.stat()
return TEMPLATES.TemplateResponse(
request,
"snapshots.html",
{
"snapshots": _list_all_snapshots(settings.data_dir),
"active_nav": "snapshots",
"preview": {
"snapshot_id": snapshot_id,
"kind": kind,
"filename": path.name,
"file_size_bytes": stat.st_size,
"last_event_id": last_event_id,
"current_event_log_max_id": current_max_id,
"event_delta": current_max_id - last_event_id,
"event_log_rows_in_snapshot": len(dump.get("event_log", [])),
},
},
)