chore: snapshots.py polish — hoisted imports + strict kind + mtime doc (T105)

This commit is contained in:
Joseph Doherty
2026-04-27 04:47:14 -04:00
parent a06f90a164
commit 64c9ca634a
2 changed files with 57 additions and 9 deletions
+35 -9
View File
@@ -8,20 +8,27 @@ Routes:
* ``GET /snapshots`` list all snapshots (both kinds) * ``GET /snapshots`` list all snapshots (both kinds)
* ``POST /snapshots/take`` take a periodic snapshot now * ``POST /snapshots/take`` take a periodic snapshot now
* ``POST /snapshots/restore/{id}`` restore (requires matching ``confirm_id``) * ``POST /snapshots/restore/{id}`` restore (requires matching ``confirm_id`` and ``kind``)
* ``GET /snapshots/{id}/preview`` show metadata + delta vs current * ``GET /snapshots/{id}/preview`` show metadata + delta vs current
The ``snapshot_id`` is the filename stem (the UTC timestamp written by The ``snapshot_id`` is the filename stem (the UTC timestamp written by
:func:`chat.services.snapshot.take_snapshot`) — there's no separate UUID, :func:`chat.services.snapshot.take_snapshot`) — there's no separate UUID,
and the timestamp filename is already unique per snapshot kind. Both and the timestamp filename is already unique per snapshot kind. Both
periodic and rewind snapshots share the same id space lookup-wise, so periodic and rewind snapshots share the same id space lookup-wise, so
the restore + preview routes accept ``kind`` as a form/query param to the restore + preview routes require ``kind`` as a form/query param to
disambiguate. disambiguate (a missing/empty ``kind`` is a 400, not a silent default).
Note on ``created_at`` mtime drift: the listing's ``created_at`` comes
from the file's mtime, not the encoded filename timestamp. ``cp -p``
preserves mtime, but plain ``cp`` resets it to "now" — so a copied
snapshot can show a misleading ``created_at`` while its filename still
reflects the original UTC capture time.
""" """
from __future__ import annotations from __future__ import annotations
import json import json
from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from fastapi import APIRouter, Depends, Form, HTTPException, Request from fastapi import APIRouter, Depends, Form, HTTPException, Request
@@ -52,8 +59,6 @@ def _list_all_snapshots(data_dir: Path) -> list[dict]:
``last_event_id`` (parsed from the JSON body — small enough that ``last_event_id`` (parsed from the JSON body — small enough that
listing isn't a performance concern for the handful of files we keep). listing isn't a performance concern for the handful of files we keep).
""" """
from datetime import datetime, timezone
rows: list[dict] = [] rows: list[dict] = []
for kind in SNAPSHOT_KINDS: for kind in SNAPSHOT_KINDS:
snap_dir = data_dir / "snapshots" / kind snap_dir = data_dir / "snapshots" / kind
@@ -85,12 +90,26 @@ def _list_all_snapshots(data_dir: Path) -> list[dict]:
return rows return rows
def _require_kind(kind: str) -> str:
"""Reject missing/empty/unknown ``kind`` with 400.
Defaulting silently to ``"periodic"`` made rewind-snapshot lookups
appear as 404s, which is confusing — make the client always state
the kind explicitly.
"""
if not kind or kind not in SNAPSHOT_KINDS:
raise HTTPException(
status_code=400,
detail=f"kind must be one of {SNAPSHOT_KINDS}",
)
return kind
def _resolve_snapshot_path( def _resolve_snapshot_path(
data_dir: Path, snapshot_id: str, kind: str data_dir: Path, snapshot_id: str, kind: str
) -> Path: ) -> Path:
"""Map an ``(id, kind)`` pair to the on-disk file, or 404.""" """Map an ``(id, kind)`` pair to the on-disk file, or 404."""
if kind not in SNAPSHOT_KINDS: _require_kind(kind)
raise HTTPException(status_code=400, detail=f"unknown kind: {kind}")
path = data_dir / "snapshots" / kind / f"{snapshot_id}.json" path = data_dir / "snapshots" / kind / f"{snapshot_id}.json"
if not path.exists(): if not path.exists():
raise HTTPException(status_code=404, detail="snapshot not found") raise HTTPException(status_code=404, detail="snapshot not found")
@@ -127,7 +146,7 @@ async def snapshots_restore(
snapshot_id: str, snapshot_id: str,
request: Request, request: Request,
confirm_id: str = Form(""), confirm_id: str = Form(""),
kind: str = Form("periodic"), kind: str = Form(""),
conn=Depends(get_conn), conn=Depends(get_conn),
): ):
"""Hard-confirm restore: ``confirm_id`` must equal the path id. """Hard-confirm restore: ``confirm_id`` must equal the path id.
@@ -135,7 +154,11 @@ async def snapshots_restore(
Mismatched confirm → 400 (without touching the DB). On match, the Mismatched confirm → 400 (without touching the DB). On match, the
existing :func:`restore_from_snapshot` clears projected tables and existing :func:`restore_from_snapshot` clears projected tables and
re-loads them from the dump. re-loads them from the dump.
``kind`` is required (must be ``"periodic"`` or ``"rewind"``) — a
missing or empty value 400s rather than silently defaulting.
""" """
_require_kind(kind)
if confirm_id != snapshot_id: if confirm_id != snapshot_id:
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
@@ -151,7 +174,7 @@ async def snapshots_restore(
async def snapshots_preview( async def snapshots_preview(
snapshot_id: str, snapshot_id: str,
request: Request, request: Request,
kind: str = "periodic", kind: str = "",
conn=Depends(get_conn), conn=Depends(get_conn),
): ):
"""Show snapshot metadata + a basic delta against the current event log. """Show snapshot metadata + a basic delta against the current event log.
@@ -159,7 +182,10 @@ async def snapshots_preview(
Phase 4 keeps this simple: the snapshot's ``last_event_id`` plus the Phase 4 keeps this simple: the snapshot's ``last_event_id`` plus the
current ``MAX(event_log.id)`` is enough to tell the user how far the current ``MAX(event_log.id)`` is enough to tell the user how far the
log has moved on. A richer per-table diff is a Phase 4.5+ concern. log has moved on. A richer per-table diff is a Phase 4.5+ concern.
``kind`` is required — see :func:`snapshots_restore`.
""" """
_require_kind(kind)
settings = request.app.state.settings settings = request.app.state.settings
path = _resolve_snapshot_path(settings.data_dir, snapshot_id, kind) path = _resolve_snapshot_path(settings.data_dir, snapshot_id, kind)
dump = json.loads(path.read_text()) dump = json.loads(path.read_text())
+22
View File
@@ -156,6 +156,28 @@ def test_restore_snapshot_wrong_confirm_400(client, tmp_path):
assert response.status_code == 400 assert response.status_code == 400
def test_restore_without_kind_returns_400(client, tmp_path):
"""T105: Missing or empty ``kind`` must be rejected with 400.
Previously ``kind`` defaulted to ``"periodic"``, which silently 404'd
when the caller meant a rewind snapshot. Tighten the contract so the
client must always pass an explicit, valid ``kind``.
"""
db_path = tmp_path / "test.db"
_seed_bot(db_path, "bot_a", "BotA")
snapshot_path = _take_snapshot_via_service(
db_path, tmp_path, kind="periodic"
)
snapshot_id = snapshot_path.stem
response = client.post(
f"/snapshots/restore/{snapshot_id}",
data={"confirm_id": snapshot_id}, # no `kind`
follow_redirects=False,
)
assert response.status_code == 400
def test_preview_renders_metadata(client, tmp_path): def test_preview_renders_metadata(client, tmp_path):
db_path = tmp_path / "test.db" db_path = tmp_path / "test.db"
_seed_bot(db_path, "bot_a", "BotA") _seed_bot(db_path, "bot_a", "BotA")