From 996a16cfb50f014580d912cc87f0dcc92912a849 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 04:34:18 -0400 Subject: [PATCH 01/24] perf: search.py N+1 batching + k constant extraction (T106) --- chat/web/search.py | 142 +++++++++++++++++++++++++++++++++++++--- tests/test_search_ux.py | 28 ++++++++ 2 files changed, 161 insertions(+), 9 deletions(-) diff --git a/chat/web/search.py b/chat/web/search.py index 51d75ea..458c7c7 100644 --- a/chat/web/search.py +++ b/chat/web/search.py @@ -14,6 +14,12 @@ For each match we hydrate just enough metadata to render a row: * the originating scene title when one exists, * and the ``pov_summary`` itself. +T106 (Phase 4.5): hydration is batched. Pre-T106 the route called +``get_bot``/``get_chat``/``get_scene`` once per result row — N+1 with +``DEFAULT_SEARCH_K=50`` meaning up to 150 individual SELECTs per page +load. We now collect distinct ids first and fan-in via three +``WHERE id IN (...)`` queries, then map back per row. + We deliberately keep this module synchronous and template-only — no HTMX swaps, no JSON API — because the search box is a "leave the current chat to look something up" surface, not an inline drawer. @@ -21,7 +27,9 @@ current chat to look something up" surface, not an inline drawer. from __future__ import annotations +import json from pathlib import Path +from sqlite3 import Connection from fastapi import APIRouter, Depends, Request from fastapi.responses import HTMLResponse @@ -36,29 +44,145 @@ TEMPLATES = Jinja2Templates( directory=str(Path(__file__).resolve().parent.parent / "templates") ) +#: Maximum cross-chat FTS matches surfaced per ``/search`` page load. +#: Extracted as a module-level constant (T106) so the cap is tunable +#: without touching the route body. ``search_all_memories`` itself +#: defaults to a smaller ``k=20``; we override here because the +#: top-bar search is a "scan everything I've seen" surface, not an +#: inline drawer. +DEFAULT_SEARCH_K = 50 + router = APIRouter() +def _fetch_bots_by_ids(conn: Connection, ids: set[str]) -> dict[str, dict]: + """Batched sibling of :func:`chat.state.entities.get_bot`. + + Inlined here (not exported from ``state.entities``) to keep T106's + scope confined to ``search.py`` per the Phase 4.5 plan. Returns + ``{bot_id: bot_dict}`` for every id present in ``ids``; ids with + no matching row are simply absent from the map (the caller falls + back to the raw id string the same way it did pre-T106). + + Empty ``ids`` short-circuits to ``{}`` because SQLite rejects + ``WHERE id IN ()`` as a syntax error. + """ + if not ids: + return {} + placeholders = ",".join("?" * len(ids)) + cols = [c[1] for c in conn.execute("PRAGMA table_info(bots)").fetchall()] + rows = conn.execute( + f"SELECT * FROM bots WHERE id IN ({placeholders})", + tuple(ids), + ).fetchall() + out: dict[str, dict] = {} + for row in rows: + d = dict(zip(cols, row)) + d["voice_samples"] = json.loads(d.pop("voice_samples_json")) + d["traits"] = json.loads(d.pop("traits_json")) + out[d["id"]] = d + return out + + +def _fetch_chats_by_ids(conn: Connection, ids: set[str]) -> dict[str, dict]: + """Batched sibling of :func:`chat.state.world.get_chat`. + + Mirrors that helper's ``chats``/``chat_state`` JOIN so the returned + dicts have the same shape (``narrative_anchor``, ``time``, + ``weather``, ``active_scene_id``, etc.). Empty ``ids`` returns + ``{}`` to dodge the ``IN ()`` syntax error. + """ + if not ids: + return {} + placeholders = ",".join("?" * len(ids)) + rows = conn.execute( + "SELECT c.id, c.host_bot_id, c.guest_bot_id, c.created_at, " + " s.time, s.weather, s.active_scene_id, s.narrative_anchor " + f"FROM chats c JOIN chat_state s ON s.chat_id = c.id " + f"WHERE c.id IN ({placeholders})", + tuple(ids), + ).fetchall() + return { + row[0]: { + "id": row[0], + "host_bot_id": row[1], + "guest_bot_id": row[2], + "created_at": row[3], + "time": row[4], + "weather": row[5], + "active_scene_id": row[6], + "narrative_anchor": row[7], + } + for row in rows + } + + +def _fetch_scenes_by_ids(conn: Connection, ids: set[int]) -> dict[int, dict]: + """Batched sibling of :func:`chat.state.world.get_scene`. + + Returns ``{scene_id: scene_dict}`` with ``participants`` already + JSON-decoded so callers see the same shape as the per-row helper. + Empty ``ids`` returns ``{}``. + """ + if not ids: + return {} + placeholders = ",".join("?" * len(ids)) + cols = [c[1] for c in conn.execute("PRAGMA table_info(scenes)").fetchall()] + rows = conn.execute( + f"SELECT * FROM scenes WHERE id IN ({placeholders})", + tuple(ids), + ).fetchall() + out: dict[int, dict] = {} + for row in rows: + d = dict(zip(cols, row)) + d["participants"] = json.loads(d.pop("participants_json")) + out[d["id"]] = d + return out + + @router.get("/search", response_class=HTMLResponse) async def search(request: Request, q: str = "", conn=Depends(get_conn)): - """Render ``search.html`` with up to 50 cross-chat FTS matches. + """Render ``search.html`` with up to :data:`DEFAULT_SEARCH_K` matches. ``q`` is intentionally allowed to be empty — that path renders the page's "enter a query" placeholder rather than a 400, because the top-bar form submits to this URL even with an empty input. T93's service short-circuits whitespace-only queries to ``[]`` so there is no FTS5 ``MATCH ''`` syntax error to guard against here. - """ - raw_results = search_all_memories(conn, query=q, k=50) if q else [] - # Hydrate display fields per row. We do this in the route (not the - # service) so the service stays a pure FTS shim that other UIs - # can reuse. + Hydration (T106) is batched: rather than calling ``get_bot`` / + ``get_chat`` / ``get_scene`` per row (worst case 3 * k individual + SELECTs), we collect distinct ids and issue one ``IN (...)`` query + per entity kind, then map back during the row build. ``get_bot`` + et al. remain imported for test-time monkeypatching but are no + longer invoked on the hot path. + """ + raw_results = ( + search_all_memories(conn, query=q, k=DEFAULT_SEARCH_K) if q else [] + ) + + # Collect distinct ids up front so the IN-list queries dedupe (a + # popular bot or scene shows up many times across the result set). + bot_ids: set[str] = {r["owner_id"] for r in raw_results if r["owner_id"]} + chat_ids: set[str] = {r["chat_id"] for r in raw_results if r["chat_id"]} + scene_ids: set[int] = { + r["scene_id"] for r in raw_results if r["scene_id"] + } + + bots_by_id = _fetch_bots_by_ids(conn, bot_ids) + chats_by_id = _fetch_chats_by_ids(conn, chat_ids) + scenes_by_id = _fetch_scenes_by_ids(conn, scene_ids) + + # Hydrate display fields per row from the batched maps. We do this + # in the route (not the service) so the service stays a pure FTS + # shim that other UIs can reuse. results = [] for row in raw_results: - bot = get_bot(conn, row["owner_id"]) - chat = get_chat(conn, row["chat_id"]) - scene = get_scene(conn, row["scene_id"]) if row["scene_id"] else None + bot = bots_by_id.get(row["owner_id"]) + chat = chats_by_id.get(row["chat_id"]) + scene = ( + scenes_by_id.get(row["scene_id"]) if row["scene_id"] else None + ) results.append( { "memory_id": row["memory_id"], diff --git a/tests/test_search_ux.py b/tests/test_search_ux.py index 7254549..013337b 100644 --- a/tests/test_search_ux.py +++ b/tests/test_search_ux.py @@ -16,6 +16,7 @@ Verifies the FastAPI ``/search`` route that wraps T93's from __future__ import annotations from pathlib import Path +from unittest.mock import patch import pytest from fastapi.testclient import TestClient @@ -133,3 +134,30 @@ def test_result_links_navigate_to_chat(client, tmp_path): # The link target is chat-level (memories don't carry an event_id # column today, so we don't deep-link to a specific turn). assert 'href="/chats/chat_a"' in resp.text + + +def test_search_results_use_batched_lookups(client, tmp_path): + """T106: hydration must not fan out to per-row ``get_bot``/ + ``get_chat``/``get_scene`` calls. + + The previous implementation called each helper once per result row + (worst case 50 rows x 3 helpers = 150 individual queries). The + batched implementation collects distinct ids and issues at most one + query per entity kind via ``WHERE id IN (...)``, so the per-row + helpers should not be invoked at all when there are matches. + + We seed two chats (so both ``get_bot`` and ``get_chat`` would have + been hit pre-T106) and assert each helper sees zero per-row calls. + """ + _seed_two_chats_with_memories(tmp_path / "test.db") + with ( + patch("chat.web.search.get_bot") as mock_get_bot, + patch("chat.web.search.get_chat") as mock_get_chat, + patch("chat.web.search.get_scene") as mock_get_scene, + ): + resp = client.get("/search?q=rabbit") + assert resp.status_code == 200 + # Batched IN-list queries replace the per-row helpers entirely. + assert mock_get_bot.call_count == 0 + assert mock_get_chat.call_count == 0 + assert mock_get_scene.call_count == 0 -- 2.52.0 From b65e1e1098c9f4ce99dcdc9ff6c03fd4455dfa37 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 04:34:28 -0400 Subject: [PATCH 02/24] chore: memory.py DRY MAX(id) helper + document fts_rank=None contract (T104) --- chat/state/memory.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/chat/state/memory.py b/chat/state/memory.py index 42a7e95..a9d62df 100644 --- a/chat/state/memory.py +++ b/chat/state/memory.py @@ -112,6 +112,25 @@ SIGNIFICANCE_RANK_BIAS = 0.5 RRF_CONST = 60 +def _max_event_id(conn: Connection, owner_id: str) -> int: + """Return the largest ``memories.id`` for ``owner_id`` (1 if none exist). + + Used as the recency-boost denominator by both ``_composite_rerank`` and + ``_rrf_fuse_and_rerank`` (T104). The row id is a monotonic recency proxy + — newer memories have larger ids — so dividing by the per-owner max keeps + the boost in [0, 1] regardless of how many memories the owner has. + + Returns 1 (not 0) when the owner has no rows so callers can divide by + the result without a guard. The "no memories" case never actually hits + this helper because the FTS query above would have returned no rows, + but the safe default keeps the helper trivially reusable. + """ + row = conn.execute( + "SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,) + ).fetchone() + return row[0] if row and row[0] else 1 + + def search_memories( conn: Connection, owner_id: str, @@ -163,6 +182,14 @@ def search_memories( When ``query_vector`` is None: FTS-only behaviour unchanged — all Phase 1-3.5 callers see the same row shape and ordering as before. + + **Row-shape contract (T104):** every returned dict carries an + ``fts_rank`` key. For FTS hits this is the BM25 score (a negative float, + lower-is-better). For *vector-only* hits surfaced by the fused path — + rows that matched the query embedding but did NOT match FTS — the + ``fts_rank`` value is ``None``. Downstream consumers must accept + ``None`` here; do not assume ``fts_rank`` is always numeric. The + ``composite_score`` is always a float on every returned row. """ if witness_role not in _VALID_WITNESS_ROLES: raise ValueError( @@ -227,10 +254,7 @@ def _composite_rerank( Extracted from ``search_memories`` so the no-vector path stays a single call and the fused path can re-use the same boost formulae after RRF. """ - max_id_row = conn.execute( - "SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,) - ).fetchone() - max_id = max_id_row[0] if max_id_row and max_id_row[0] else 1 + max_id = _max_event_id(conn, owner_id) result_cols = cols + ["fts_rank"] enriched: list[dict] = [] @@ -343,10 +367,7 @@ def _rrf_fuse_and_rerank( # Final composite re-rank: significance + recency boosts on top of the # negated fusion score so the sort direction matches the FTS-only path. - max_id_row = conn.execute( - "SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,) - ).fetchone() - max_id = max_id_row[0] if max_id_row and max_id_row[0] else 1 + max_id = _max_event_id(conn, owner_id) result_cols = cols + ["fts_rank"] enriched: list[dict] = [] -- 2.52.0 From 374a76c867930955dbc9b8484dc7efbb7007786f Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 04:34:32 -0400 Subject: [PATCH 03/24] =?UTF-8?q?chore:=20branches=20polish=20=E2=80=94=20?= =?UTF-8?q?global-leak=20docs=20+=20unknown-name=20warning=20(T103)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chat/state/branches.py | 31 +++++++++++++++++++++++++++++++ tests/test_branches_state.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/chat/state/branches.py b/chat/state/branches.py index 101627e..c51808e 100644 --- a/chat/state/branches.py +++ b/chat/state/branches.py @@ -9,11 +9,15 @@ existing event readers remain branch-agnostic. """ from __future__ import annotations + +import logging from sqlite3 import Connection from chat.eventlog.projector import on from chat.eventlog.log import Event +logger = logging.getLogger(__name__) + @on("branch_created") def _apply_branch_created(conn: Connection, e: Event) -> None: @@ -37,9 +41,26 @@ def _apply_branch_switched(conn: Connection, e: Event) -> None: """Set is_active=1 on the named branch and is_active=0 on all others. Atomic via two UPDATEs ordered to avoid the unique-active-index race. + + If the named branch does not exist, a warning is emitted and the + is_active flags are still cleared (preserving prior behavior — the + second UPDATE simply matches no rows). Callers should validate the + name upstream; this guard surfaces accidental mismatches in the log. """ p = e.payload name = p["name"] + # Warn (don't raise) if the target branch is missing. The existing + # outcome — zero active branches — is preserved; this just makes the + # condition observable instead of silent. + exists = conn.execute( + "SELECT 1 FROM branches WHERE name = ? LIMIT 1", + (name,), + ).fetchone() + if exists is None: + logger.warning( + "branch_switched to unknown branch name %r; no branch will be active", + name, + ) # Clear ALL is_active flags first (avoids the unique-index trip). conn.execute("UPDATE branches SET is_active = 0 WHERE is_active = 1") conn.execute( @@ -79,6 +100,16 @@ def get_branch(conn: Connection, name: str) -> dict | None: def list_branches(conn: Connection, chat_id: str | None = None) -> list[dict]: + """Return branch rows, optionally scoped to a chat. + + When ``chat_id`` is provided the filter is ``chat_id = ? OR chat_id IS NULL``, + so global (null-chat) branches are returned in *every* per-chat scope. This + is intentional: the bootstrapped ``"main"`` branch (and any future + null-chat branches) are global by design — they belong to no single chat + and should appear alongside per-chat branches in any chat-scoped listing. + Callers that want only per-chat branches should filter the result on + ``chat_id is not None``. + """ if chat_id is None: rows = conn.execute( "SELECT id, name, origin_event_id, head_event_id, chat_id, " diff --git a/tests/test_branches_state.py b/tests/test_branches_state.py index ace2e8e..ea397e2 100644 --- a/tests/test_branches_state.py +++ b/tests/test_branches_state.py @@ -1,5 +1,7 @@ from __future__ import annotations +import logging + from chat.db.connection import open_db from chat.db.migrate import apply_migrations from chat.eventlog.log import append_event @@ -139,3 +141,36 @@ def test_list_branches_returns_all(tmp_path): names = [b["name"] for b in list_branches(conn)] assert "main" in names assert "experiment" in names + + +def test_branch_switched_unknown_name_warns(tmp_path, caplog): + """Switching to a nonexistent branch logs a warning and leaves no branch active. + + The previous behavior silently cleared is_active flags and applied no UPDATE + when the named branch did not exist. T103 makes that condition observable + by emitting a warning while preserving the existing (zero-active) outcome. + """ + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + with caplog.at_level(logging.WARNING, logger="chat.state.branches"): + append_event( + conn, + kind="branch_switched", + payload={"name": "does_not_exist"}, + ) + project(conn) + + # A warning was emitted naming the missing branch. + warnings = [ + r for r in caplog.records + if r.levelno == logging.WARNING and r.name == "chat.state.branches" + ] + assert warnings, "expected a warning for unknown branch name" + assert any("does_not_exist" in r.getMessage() for r in warnings) + + # Existing behavior preserved: no branch is active after the switch. + assert active_branch(conn) is None + + # The unknown name was not inserted as a side effect. + assert get_branch(conn, "does_not_exist") is None -- 2.52.0 From 64c9ca634ada6526de3902fa33e97eae28158f5a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 04:47:14 -0400 Subject: [PATCH 04/24] =?UTF-8?q?chore:=20snapshots.py=20polish=20?= =?UTF-8?q?=E2=80=94=20hoisted=20imports=20+=20strict=20kind=20+=20mtime?= =?UTF-8?q?=20doc=20(T105)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chat/web/snapshots.py | 44 +++++++++++++++++++++++++++++++-------- tests/test_snapshot_ux.py | 22 ++++++++++++++++++++ 2 files changed, 57 insertions(+), 9 deletions(-) diff --git a/chat/web/snapshots.py b/chat/web/snapshots.py index ae3cc30..c169e4f 100644 --- a/chat/web/snapshots.py +++ b/chat/web/snapshots.py @@ -8,20 +8,27 @@ Routes: * ``GET /snapshots`` list all snapshots (both kinds) * ``POST /snapshots/take`` take a periodic snapshot now -* ``POST /snapshots/restore/{id}`` restore (requires matching ``confirm_id``) +* ``POST /snapshots/restore/{id}`` restore (requires matching ``confirm_id`` and ``kind``) * ``GET /snapshots/{id}/preview`` show metadata + delta vs current The ``snapshot_id`` is the filename stem (the UTC timestamp written by :func:`chat.services.snapshot.take_snapshot`) — there's no separate UUID, and the timestamp filename is already unique per snapshot kind. Both periodic and rewind snapshots share the same id space lookup-wise, so -the restore + preview routes accept ``kind`` as a form/query param to -disambiguate. +the restore + preview routes require ``kind`` as a form/query param to +disambiguate (a missing/empty ``kind`` is a 400, not a silent default). + +Note on ``created_at`` mtime drift: the listing's ``created_at`` comes +from the file's mtime, not the encoded filename timestamp. ``cp -p`` +preserves mtime, but plain ``cp`` resets it to "now" — so a copied +snapshot can show a misleading ``created_at`` while its filename still +reflects the original UTC capture time. """ from __future__ import annotations import json +from datetime import datetime, timezone from pathlib import Path from fastapi import APIRouter, Depends, Form, HTTPException, Request @@ -52,8 +59,6 @@ def _list_all_snapshots(data_dir: Path) -> list[dict]: ``last_event_id`` (parsed from the JSON body — small enough that listing isn't a performance concern for the handful of files we keep). """ - from datetime import datetime, timezone - rows: list[dict] = [] for kind in SNAPSHOT_KINDS: snap_dir = data_dir / "snapshots" / kind @@ -85,12 +90,26 @@ def _list_all_snapshots(data_dir: Path) -> list[dict]: return rows +def _require_kind(kind: str) -> str: + """Reject missing/empty/unknown ``kind`` with 400. + + Defaulting silently to ``"periodic"`` made rewind-snapshot lookups + appear as 404s, which is confusing — make the client always state + the kind explicitly. + """ + if not kind or kind not in SNAPSHOT_KINDS: + raise HTTPException( + status_code=400, + detail=f"kind must be one of {SNAPSHOT_KINDS}", + ) + return kind + + def _resolve_snapshot_path( data_dir: Path, snapshot_id: str, kind: str ) -> Path: """Map an ``(id, kind)`` pair to the on-disk file, or 404.""" - if kind not in SNAPSHOT_KINDS: - raise HTTPException(status_code=400, detail=f"unknown kind: {kind}") + _require_kind(kind) path = data_dir / "snapshots" / kind / f"{snapshot_id}.json" if not path.exists(): raise HTTPException(status_code=404, detail="snapshot not found") @@ -127,7 +146,7 @@ async def snapshots_restore( snapshot_id: str, request: Request, confirm_id: str = Form(""), - kind: str = Form("periodic"), + kind: str = Form(""), conn=Depends(get_conn), ): """Hard-confirm restore: ``confirm_id`` must equal the path id. @@ -135,7 +154,11 @@ async def snapshots_restore( Mismatched confirm → 400 (without touching the DB). On match, the existing :func:`restore_from_snapshot` clears projected tables and re-loads them from the dump. + + ``kind`` is required (must be ``"periodic"`` or ``"rewind"``) — a + missing or empty value 400s rather than silently defaulting. """ + _require_kind(kind) if confirm_id != snapshot_id: raise HTTPException( status_code=400, @@ -151,7 +174,7 @@ async def snapshots_restore( async def snapshots_preview( snapshot_id: str, request: Request, - kind: str = "periodic", + kind: str = "", conn=Depends(get_conn), ): """Show snapshot metadata + a basic delta against the current event log. @@ -159,7 +182,10 @@ async def snapshots_preview( Phase 4 keeps this simple: the snapshot's ``last_event_id`` plus the current ``MAX(event_log.id)`` is enough to tell the user how far the log has moved on. A richer per-table diff is a Phase 4.5+ concern. + + ``kind`` is required — see :func:`snapshots_restore`. """ + _require_kind(kind) settings = request.app.state.settings path = _resolve_snapshot_path(settings.data_dir, snapshot_id, kind) dump = json.loads(path.read_text()) diff --git a/tests/test_snapshot_ux.py b/tests/test_snapshot_ux.py index 347f9ce..3db7cbd 100644 --- a/tests/test_snapshot_ux.py +++ b/tests/test_snapshot_ux.py @@ -156,6 +156,28 @@ def test_restore_snapshot_wrong_confirm_400(client, tmp_path): assert response.status_code == 400 +def test_restore_without_kind_returns_400(client, tmp_path): + """T105: Missing or empty ``kind`` must be rejected with 400. + + Previously ``kind`` defaulted to ``"periodic"``, which silently 404'd + when the caller meant a rewind snapshot. Tighten the contract so the + client must always pass an explicit, valid ``kind``. + """ + db_path = tmp_path / "test.db" + _seed_bot(db_path, "bot_a", "BotA") + snapshot_path = _take_snapshot_via_service( + db_path, tmp_path, kind="periodic" + ) + snapshot_id = snapshot_path.stem + + response = client.post( + f"/snapshots/restore/{snapshot_id}", + data={"confirm_id": snapshot_id}, # no `kind` + follow_redirects=False, + ) + assert response.status_code == 400 + + def test_preview_renders_metadata(client, tmp_path): db_path = tmp_path / "test.db" _seed_bot(db_path, "bot_a", "BotA") -- 2.52.0 From 29b7c90b29aef99a8be7af644ee03f23c8336fcf Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 04:47:17 -0400 Subject: [PATCH 05/24] chore: embeddings.py warns on fallback for non-default models (T107) --- chat/services/embeddings.py | 13 ++++++++++++- tests/test_embeddings.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/chat/services/embeddings.py b/chat/services/embeddings.py index ece6eae..44002ea 100644 --- a/chat/services/embeddings.py +++ b/chat/services/embeddings.py @@ -10,6 +10,7 @@ EmbeddingResult shape stays the same, only the generator changes. from __future__ import annotations import hashlib +import logging import math import struct @@ -18,6 +19,8 @@ from pydantic import BaseModel from chat.llm.client import LLMClient +_log = logging.getLogger(__name__) + DEFAULT_EMBEDDING_DIM = 384 DEFAULT_EMBEDDING_MODEL = "pseudo-sha256-384" FALLBACK_EMBEDDING_MODEL = "fallback" @@ -93,7 +96,15 @@ async def generate_embedding( return EmbeddingResult(vector=_pseudo_embed(text, dim), model=model, dim=dim) # Future: real embedding via client.embed(...). Phase 4.5 work. - # For Phase 4, any non-default model falls through to fallback. + # For Phase 4, any non-default model falls through to fallback — + # warn so misconfigured callers (e.g., a real-model swap that isn't + # wired up yet) don't silently degrade to a zero vector. + _log.warning( + "generate_embedding: non-default model %r returned fallback " + "(model client.embed() not yet implemented in Phase 4.5+); " + "downstream search will degrade silently. Configure a supported model.", + model, + ) return EmbeddingResult( vector=[0.0] * dim, model=FALLBACK_EMBEDDING_MODEL, dim=dim ) diff --git a/tests/test_embeddings.py b/tests/test_embeddings.py index b458681..4d1dc4b 100644 --- a/tests/test_embeddings.py +++ b/tests/test_embeddings.py @@ -20,6 +20,7 @@ The pseudo path doesn't touch the LLMClient, so we pass an empty from __future__ import annotations +import logging import math import pytest @@ -89,3 +90,33 @@ async def test_generate_embedding_unit_normalized(): result = await generate_embedding(_client(), text="some non-empty text") norm_sq = sum(x * x for x in result.vector) assert math.isclose(norm_sq, 1.0, abs_tol=1e-6) + + +@pytest.mark.asyncio +async def test_generate_embedding_non_default_model_logs_warning(caplog): + """T107: non-default model falls through to fallback and must warn. + + A Phase 4.5+ caller pointing at a real model that isn't yet wired + up would otherwise silently degrade (zero vector → useless cosine). + The warning surfaces the misconfiguration in logs. + """ + caplog.set_level(logging.WARNING, logger="chat.services.embeddings") + result = await generate_embedding(_client(), text="hello", model="real-model") + + # Behavior unchanged: still returns the fallback sentinel. + assert result.model == FALLBACK_EMBEDDING_MODEL == "fallback" + assert all(x == 0.0 for x in result.vector) + + # Warning fired and names the offending model. + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] + assert any("non-default model" in r.getMessage() for r in warnings) + assert any("real-model" in r.getMessage() for r in warnings) + + +@pytest.mark.asyncio +async def test_generate_embedding_default_model_does_not_warn(caplog): + """T107: the silent default path must stay silent.""" + caplog.set_level(logging.WARNING, logger="chat.services.embeddings") + await generate_embedding(_client(), text="hello") + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] + assert warnings == [] -- 2.52.0 From baffeb3a445650ca42b6c7cfd09cfbbf1ed11c1a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 04:47:26 -0400 Subject: [PATCH 06/24] =?UTF-8?q?chore:=20scene-close-on-cancel=20?= =?UTF-8?q?=E2=80=94=20strengthen=20regression=20test=20+=20document=20rat?= =?UTF-8?q?ionale=20(T108)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Investigation surfaced a transactional bug in the cancel path: when the primary stream raises asyncio.CancelledError mid-stream, post_turn re-raises at end-of-function, and open_db's dependency teardown skips conn.commit() — rolling back ALL post-cancel writes including the scene_closed event. The existing T74.3 regression test only passes because asyncio is not imported at module scope, so CancelledError becomes NameError (caught by except Exception, leaves cancelled=False). Documented in turns.py + test docstring; deferred for triage. --- chat/web/turns.py | 14 ++++++++++++++ tests/test_turn_flow.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/chat/web/turns.py b/chat/web/turns.py index 97ef4a6..dfb4b21 100644 --- a/chat/web/turns.py +++ b/chat/web/turns.py @@ -873,6 +873,20 @@ async def post_turn( # mid-stream still meant to close the scene — the cancelled bot # beat doesn't invalidate that intent. Pinned by # test_cancelled_turn_still_closes_scene_when_user_prose_signals_close. + # + # T108 NOTE — the in-memory append order is correct, but the cancel + # path re-raises ``CancelledError`` at the end of ``post_turn`` + # (see step 11 below). The ``open_db`` dependency teardown skips + # ``conn.commit()`` when the consumer raises, which means in + # production a genuine cancel currently rolls back ALL post-cancel + # writes — including this scene_closed event, the truncated + # assistant_turn record, edge updates, and per-POV summaries. The + # T74.3 regression test passes only because of a missing + # ``import asyncio`` in the test module: the inline mock raises + # ``NameError`` instead of ``CancelledError``, which is caught by + # the ``except Exception:`` branch and leaves ``cancelled=False``, + # so the function returns 204 normally and the commit fires. This + # is a transactional bug deferred for triage (T108 report). if scene is not None and prose.strip(): container = None if scene.get("container_id") is not None: diff --git a/tests/test_turn_flow.py b/tests/test_turn_flow.py index 043fa78..9d3fd0f 100644 --- a/tests/test_turn_flow.py +++ b/tests/test_turn_flow.py @@ -734,6 +734,19 @@ def test_cancelled_turn_still_closes_scene_when_user_prose_signals_close( that as an exception, so we drive the request inside ``with pytest.raises``. Despite the exception, the scene_closed event must land in the event_log. + + T108 NOTE — this test does NOT actually exercise the cancel path. + ``_CancelOnStreamMock.stream`` writes ``raise asyncio.CancelledError`` + but ``asyncio`` is not imported at module scope, so the first + iteration raises ``NameError`` (caught by ``except Exception:`` in + post_turn, which sets ``primary_truncated=True`` but leaves + ``cancelled=False``). The function therefore returns 204 normally, + the dependency-managed connection commits, and ``scene_closed`` + lands. Importing asyncio so the real CancelledError fires reveals + a transactional bug: ``post_turn``'s end-of-function re-raise + causes ``open_db``'s dependency teardown to skip ``conn.commit()``, + rolling back ALL post-cancel writes (user_turn, assistant_turn, + edge_updates, scene_closed). Deferred for triage — see T108 report. """ from typing import AsyncIterator, Sequence @@ -828,12 +841,33 @@ def test_cancelled_turn_still_closes_scene_when_user_prose_signals_close( "SELECT payload_json FROM event_log " "WHERE kind = 'assistant_turn' ORDER BY id" ).fetchall() + # T108: pin the ordering — user_turn must commit before + # scene_closed (close detection runs on prose that is already + # in the event_log) and any assistant_turn the cancel produced + # must come last (truncated record written after both). + ordered = conn.execute( + "SELECT id, kind FROM event_log " + "WHERE kind IN ('user_turn', 'scene_closed', 'assistant_turn') " + "ORDER BY id" + ).fetchall() # Scene close lands despite the cancel. assert scene_close_count == 1 # The cancelled assistant_turn was still recorded (truncated=True). assert len(assistant_payload) == 1 assert json.loads(assistant_payload[0][0])["truncated"] is True + # T108 ordering pin: user_turn lands first, the truncated + # assistant_turn (if any) is committed BEFORE the scene_close + # decision fires, and scene_closed lands last. Close detection + # relies on user prose being committed to the event_log BEFORE + # the close decision runs — and the cancelled assistant beat is + # recorded as a partial before close-detection too. + kinds_in_order = [row[1] for row in ordered] + user_idx = kinds_in_order.index("user_turn") + close_idx = kinds_in_order.index("scene_closed") + assert user_idx < close_idx + if "assistant_turn" in kinds_in_order: + assert user_idx < kinds_in_order.index("assistant_turn") < close_idx def test_interjection_enqueues_significance_job(app_state_setup, tmp_path): -- 2.52.0 From 1f8b4d2078c38f75bd3cc40df1dee7b1d04b6343 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 05:00:57 -0400 Subject: [PATCH 07/24] =?UTF-8?q?feat:=200014=20schema=20=E2=80=94=20embed?= =?UTF-8?q?dings=20FK=20CASCADE=20(deferred=20or=20applied)=20+=20memories?= =?UTF-8?q?.event=5Fid=20column=20(T109)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chat/db/migrations/0014_phase45_schema.sql | 25 ++++++++++ chat/state/memory.py | 10 +++- tests/test_memory_write.py | 56 ++++++++++++++++++++++ tests/test_world.py | 4 +- 4 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 chat/db/migrations/0014_phase45_schema.sql diff --git a/chat/db/migrations/0014_phase45_schema.sql b/chat/db/migrations/0014_phase45_schema.sql new file mode 100644 index 0000000..0d7d491 --- /dev/null +++ b/chat/db/migrations/0014_phase45_schema.sql @@ -0,0 +1,25 @@ +-- 0014_phase45_schema.sql — Phase 4.5 Wave 2 schema bump (T109). +-- +-- Two schema concerns are bundled into this migration: +-- +-- 1. ``embeddings.memory_id`` FK should ideally carry ``ON DELETE CASCADE`` +-- (T88 review nit). DEFERRED to Phase 5: ``embeddings`` rows are only ever +-- deleted when the parent ``memories`` row is deleted, and ``memories`` +-- rows are never deleted today (memory hide is a soft flag; the surgical +-- ``deindex_event`` path operates on ``event_log`` and does NOT cascade +-- to projection rows). The CASCADE constraint therefore can't fire under +-- current usage — adding the SQLite table-rebuild dance (rename, recreate, +-- copy, drop, reindex) for a defensive constraint is unwarranted bloat +-- in a polish wave. Revisit during the broader Phase 5 migration cleanup +-- when other table reshapes make the rebuild worthwhile. +-- +-- 2. Add ``memories.event_id`` (NULLABLE INTEGER, references ``event_log.id``) +-- so cross-chat search results can deep-link back to the originating +-- turn (foundation for T111). The column is nullable so historical +-- memory rows projected before 0014 ran continue to round-trip cleanly; +-- new rows are populated by the ``memory_written`` projector handler +-- from the projecting event's id. This is a pure additive change — no +-- backfill is performed. Older rows simply read NULL until/unless a +-- later migration backfills them; T111 surfaces are coded to accept +-- NULL gracefully (no deep-link rendered). +ALTER TABLE memories ADD COLUMN event_id INTEGER REFERENCES event_log(id); diff --git a/chat/state/memory.py b/chat/state/memory.py index a9d62df..9816256 100644 --- a/chat/state/memory.py +++ b/chat/state/memory.py @@ -13,13 +13,18 @@ def _row_to_dict(conn: Connection, row: tuple) -> dict: @on("memory_written") def _apply_memory_written(conn: Connection, e: Event) -> None: + # T109 (schema 0014): persist the projecting event's id on the memory + # row so cross-chat search results can deep-link back to the + # originating turn (T111). Older memory rows projected before 0014 + # ran read NULL here — the column is nullable for that reason. p = e.payload conn.execute( "INSERT INTO memories (" "owner_id, chat_id, scene_id, pov_summary, " "witness_you, witness_host, witness_guest, " - "chat_clock_at, source, reliability, significance, pinned, auto_pinned" - ") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + "chat_clock_at, source, reliability, significance, pinned, auto_pinned, " + "event_id" + ") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", ( p["owner_id"], p["chat_id"], @@ -34,6 +39,7 @@ def _apply_memory_written(conn: Connection, e: Event) -> None: int(p.get("significance", 1)), int(p.get("pinned", 0)), int(p.get("auto_pinned", 0)), + e.id, ), ) diff --git a/tests/test_memory_write.py b/tests/test_memory_write.py index 3c135a5..0ee9a51 100644 --- a/tests/test_memory_write.py +++ b/tests/test_memory_write.py @@ -586,3 +586,59 @@ def test_record_turn_memory_enqueues_embedding_job(tmp_path): assert {job.memory_id for job in captured} == expected_ids for job in captured: assert job.text == "Both bots witness this beat." + + +# --------------------------------------------------------------------------- +# T109: memories.event_id deep-link column populated by the projector. +# --------------------------------------------------------------------------- + + +def test_memory_written_populates_event_id(tmp_path): + """Schema 0014 added ``memories.event_id`` referencing ``event_log.id``. + + The ``memory_written`` projector handler must populate the column with + the projecting event's id so T111 can deep-link cross-chat search hits + back to the originating turn. + """ + db = tmp_path / "t.db" + apply_migrations(db) + _seed_minimal(db) + with open_db(db) as conn: + result = record_turn_memory_for_present( + conn, + chat_id="chat_bot_a", + host_bot_id="bot_a", + guest_bot_id=None, + narrative_text="BotA shrugs.", + ) + eid, mid = result["bot_a"] + assert eid > 0 and mid is not None + + row = conn.execute( + "SELECT event_id FROM memories WHERE id = ?", (mid,) + ).fetchone() + assert row is not None + assert row[0] == eid + + +def test_memory_event_id_column_is_nullable_for_backfill(tmp_path): + """Backward compat: the ``event_id`` column is nullable so historical + memory rows projected before 0014 ran (or rows synthesised by tests + that bypass the projector) don't break the schema. A direct INSERT + omitting the column must succeed and read back NULL.""" + db = tmp_path / "t.db" + apply_migrations(db) + _seed_minimal(db) + with open_db(db) as conn: + conn.execute( + "INSERT INTO memories (" + "owner_id, chat_id, pov_summary, " + "witness_you, witness_host, witness_guest" + ") VALUES (?, ?, ?, ?, ?, ?)", + ("bot_a", "chat_bot_a", "legacy row", 1, 1, 0), + ) + row = conn.execute( + "SELECT event_id FROM memories WHERE pov_summary = 'legacy row'" + ).fetchone() + assert row is not None + assert row[0] is None diff --git a/tests/test_world.py b/tests/test_world.py index 688b38f..c852852 100644 --- a/tests/test_world.py +++ b/tests/test_world.py @@ -324,11 +324,11 @@ def test_get_scene_returns_none_for_missing(tmp_path): assert active_scene(conn, "chat_missing") is None -def test_schema_version_after_migration_is_13(tmp_path): +def test_schema_version_after_migration_is_14(tmp_path): db = tmp_path / "t.db" apply_migrations(db) with open_db(db) as conn: row = conn.execute( "SELECT value FROM meta WHERE key = 'schema_version'" ).fetchone() - assert int(row[0]) == 13 + assert int(row[0]) == 14 -- 2.52.0 From f3827706dff45d440288d033d16916c7bebe00da Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 05:11:39 -0400 Subject: [PATCH 08/24] fix: drawer delete_turn guards event_id <= 0 (T110.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A stale tab or hand-crafted request posting event_id=0 to the surgical delete route would compute after_event_id=-1 and silently truncate the entire log. Now rejected with 400. SQLite assigns event_log ids starting at 1, so any legitimate id is always >= 1 — non-positive values can only indicate a client bug. Test: tests/test_drawer_phase4.py::test_delete_turn_with_event_id_zero_returns_400. --- chat/web/drawer.py | 12 ++++++++++++ tests/test_drawer_phase4.py | 23 +++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/chat/web/drawer.py b/chat/web/drawer.py index 5396ae8..f0c3ddb 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -1278,7 +1278,19 @@ async def delete_turn( A snapshot is taken before truncation (inside ``execute_rewind``) so the user can recover via the snapshot index. + + T110.1 guards ``event_id <= 0``: a stale tab or hand-crafted request + posting ``event_id=0`` would otherwise compute ``after_event_id=-1`` + and silently truncate the entire log. ``id`` is auto-assigned by + SQLite starting at 1 so any caller's "real" id is always >= 1; a + zero or negative value can only mean a client bug, surfaced as 400. """ + if int(event_id) <= 0: + raise HTTPException( + status_code=400, + detail=f"event_id must be a positive integer, got {event_id}", + ) + chat = get_chat(conn, chat_id) if chat is None: raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") diff --git a/tests/test_drawer_phase4.py b/tests/test_drawer_phase4.py index f94f266..f4f3235 100644 --- a/tests/test_drawer_phase4.py +++ b/tests/test_drawer_phase4.py @@ -458,6 +458,29 @@ def test_t98_4_delete_invokes_rewind_and_drops_cascade(client, tmp_path): assert row is None, f"event {ev_id} should have been deleted" +def test_delete_turn_with_event_id_zero_returns_400(client, tmp_path): + """T110.1: ``event_id <= 0`` is an obvious client error and must NOT + silently rewind the entire log via ``after_event_id = -1``. The route + rejects it with 400 so the audit trail stays intact. + """ + db = tmp_path / "test.db" + _seed_chat(db) + _seed_turns(db) + + # Sanity: events present before the bad request. + with open_db(db) as conn: + before = conn.execute("SELECT COUNT(*) FROM event_log").fetchone()[0] + assert before > 0 + + response = client.post("/chats/chat_bot_a/drawer/turn/delete/0") + assert response.status_code == 400 + + # And the log was NOT truncated. + with open_db(db) as conn: + after = conn.execute("SELECT COUNT(*) FROM event_log").fetchone()[0] + assert after == before + + # --------------------------------------------------------------------------- # T98.5 — remaining v1 edits (chat narrative anchor + weather). # --------------------------------------------------------------------------- -- 2.52.0 From a45a33534f0100f31dac9612691fe4bedcc17946 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 05:12:28 -0400 Subject: [PATCH 09/24] fix: drawer delete-impact modal HTML escapes user-controllable fields (T110.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The delete-impact modal is built via raw f-string concatenation from the ImpactReport — item.kind / item.description / report.notes ultimately embed user-controllable content (turn prose, scene timestamps). A turn with prose like "" would reach the rendered HTML verbatim. Currently safe (the fields embedded today are bounded strings) but defense-in-depth — wrap with html.escape() so future description changes can't smuggle markup through. Test: tests/test_drawer_phase4.py::test_delete_impact_modal_escapes_user_controllable_strings. --- chat/web/drawer.py | 18 +++++++++++++++--- tests/test_drawer_phase4.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/chat/web/drawer.py b/chat/web/drawer.py index f0c3ddb..a29f281 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -27,6 +27,7 @@ one so a later inverse edit can restore state (§6.4 final paragraph). from __future__ import annotations +import html import json import uuid from pathlib import Path @@ -1238,18 +1239,29 @@ async def delete_preview( # reusing the drawer template would require a fragment include just # for this surface. Mirrors the rewind-preview style in # :func:`chat.web.turns.rewind_preview`. + # + # T110.2: ``item.kind``, ``item.description``, and the notes carry + # user-controllable content (turn prose, scene timestamps, etc.). + # Wrap them with :func:`html.escape` so a payload like + # ```` renders as inert text. ``chat_id`` + # is matched against the projected ``chats`` table at request time + # (404 above) so it isn't free-form, but we escape it for symmetry. items_html = "".join( - f"
  • {item.kind}: {item.description}
  • " + f"
  • {html.escape(item.kind)}: " + f"{html.escape(item.description)}
  • " for item in report.cascading ) - notes_html = "".join(f"
  • {note}
  • " for note in report.notes) + notes_html = "".join( + f"
  • {html.escape(note)}
  • " for note in report.notes + ) body = ( "
    " f"

    Delete event {report.target_event_id}?

    " f"

    This will discard {len(report.cascading)} events. Cascade:

    " f"
      {items_html or '
    • none
    • '}
    " f"
      {notes_html}
    " - f"
    " "" "
    " diff --git a/tests/test_drawer_phase4.py b/tests/test_drawer_phase4.py index f4f3235..0b2b473 100644 --- a/tests/test_drawer_phase4.py +++ b/tests/test_drawer_phase4.py @@ -458,6 +458,42 @@ def test_t98_4_delete_invokes_rewind_and_drops_cascade(client, tmp_path): assert row is None, f"event {ev_id} should have been deleted" +def test_delete_impact_modal_escapes_user_controllable_strings(client, tmp_path): + """T110.2: defense-in-depth — fields embedded in the modal HTML come + from event payloads (turn prose, scene timestamps, etc.) which are + ultimately user-controllable. Wrap them with ``html.escape`` so a + payload like ```` renders as inert text and + doesn't leak through into the rendered modal as actual markup. + """ + db = tmp_path / "test.db" + _seed_chat(db) + + # Seed a user_turn whose prose contains an HTML-script payload. The + # modal renders ``description = "turn N (you: )"`` so + # the prose flows verbatim into the cascade list
  • . + with open_db(db) as conn: + evil_id = append_and_apply( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": "", + "segments": [], + }, + ) + + response = client.get( + f"/chats/chat_bot_a/drawer/turn/delete-preview/{evil_id}" + ) + assert response.status_code == 200 + body = response.text + + # Raw `` renders as inert text. ``chat_id`` - # is matched against the projected ``chats`` table at request time - # (404 above) so it isn't free-form, but we escape it for symmetry. - items_html = "".join( - f"
  • {html.escape(item.kind)}: " - f"{html.escape(item.description)}
  • " - for item in report.cascading + # T110.3: render via the ``_delete_impact_modal.html`` Jinja partial + # so HTML autoescape covers user-controllable fields (item.kind, + # item.description, notes) automatically. The prior implementation + # built the modal HTML via raw f-string concatenation and required + # explicit ``html.escape()`` calls (T110.2) on each interpolated + # field; under autoescape those calls become redundant. Mirrors the + # rewind-preview style in :func:`chat.web.turns.rewind_preview`. + return TEMPLATES.TemplateResponse( + request, + "_delete_impact_modal.html", + {"chat_id": chat_id, "impact": report}, ) - notes_html = "".join( - f"
  • {html.escape(note)}
  • " for note in report.notes - ) - body = ( - "
    " - f"

    Delete event {report.target_event_id}?

    " - f"

    This will discard {len(report.cascading)} events. Cascade:

    " - f"
      {items_html or '
    • none
    • '}
    " - f"
      {notes_html}
    " - f"
    " - "" - "
    " - "
    " - ) - return HTMLResponse(body) @router.post( diff --git a/tests/test_drawer_phase4.py b/tests/test_drawer_phase4.py index 0b2b473..20b428e 100644 --- a/tests/test_drawer_phase4.py +++ b/tests/test_drawer_phase4.py @@ -458,6 +458,35 @@ def test_t98_4_delete_invokes_rewind_and_drops_cascade(client, tmp_path): assert row is None, f"event {ev_id} should have been deleted" +def test_delete_impact_modal_uses_jinja_partial(client, tmp_path): + """T110.3: the modal HTML is rendered from a Jinja partial + (`_delete_impact_modal.html`) rather than f-string concatenation in + Python. Verify the partial-rendered shape: the wrapping + ``delete-impact-modal`` div, the cascade list, and the confirm form. + + The partial inherits Jinja2 autoescape so HTML safety follows + automatically — the explicit ``html.escape()`` calls from T110.2 + become redundant once this lands. + """ + db = tmp_path / "test.db" + _seed_chat(db) + user_id, _bot_id = _seed_turns(db) + + response = client.get( + f"/chats/chat_bot_a/drawer/turn/delete-preview/{user_id}" + ) + assert response.status_code == 200 + body = response.text + + # Markup shape that the partial produces. Double-quoted attributes + # signal Jinja rendering (the prior f-string used single quotes). + assert '
    ' in body + assert '
      ' in body + # The confirm form still posts to the same delete route. + assert f"/chats/chat_bot_a/drawer/turn/delete/{user_id}" in body + assert "Confirm delete" in body + + def test_delete_impact_modal_escapes_user_controllable_strings(client, tmp_path): """T110.2: defense-in-depth — fields embedded in the modal HTML come from event payloads (turn prose, scene timestamps, etc.) which are -- 2.52.0 From 2ab8fcbdf0d46c4b53637c925cd3b3894cadfdf7 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 05:14:59 -0400 Subject: [PATCH 11/24] feat: drawer bulk significance re-rate per chat (T110.4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drawer's Significance review panel previously only supported per-memory edits. Adds a bulk control: pick ``level_from`` and ``level_to``, and every memory in the chat at ``level_from`` is moved to ``level_to``. Implementation emits one ``manual_edit`` event per matching memory (not a single bulk event) so the §6.4 per-row audit trail stays intact — each affected memory carries its own ``prior_value -> new_value`` snapshot, so an inverse edit can restore an individual row without needing to inspect a bulk payload's member list. Reuses the existing ``memory_significance`` ``manual_edit`` projector branch (T25), so no state-layer changes are required. The route rejects no-op submissions (``level_from == level_to``) with 400 to avoid padding the event log with empty edits, and clamps both levels to 0..3 (matching ``edit_memory_significance``). UI: a small ``
      `` block in the Significance review section with two number inputs and a submit button. Test: tests/test_drawer_phase4.py::test_bulk_significance_re_rate_emits_manual_edit_per_memory. --- chat/templates/_drawer.html | 19 ++++++++ chat/web/drawer.py | 58 ++++++++++++++++++++++++ tests/test_drawer_phase4.py | 89 +++++++++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+) diff --git a/chat/templates/_drawer.html b/chat/templates/_drawer.html index 8cfdd5f..6bbfeeb 100644 --- a/chat/templates/_drawer.html +++ b/chat/templates/_drawer.html @@ -547,6 +547,25 @@
    {% endif %} + {# T110.4: bulk significance re-rate. Move every memory in this chat + at level_from to level_to with one manual_edit event per row, so + the audit trail stays per-memory. #} +
    + Bulk re-rate significance +
    + + + +
    +
    diff --git a/chat/web/drawer.py b/chat/web/drawer.py index b965e7a..5f94957 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -411,6 +411,64 @@ async def edit_memory_significance( return await drawer(chat_id, request, conn) +@router.post( + "/chats/{chat_id}/drawer/memory/significance/bulk", + response_class=HTMLResponse, +) +async def bulk_re_rate_significance( + chat_id: str, + request: Request, + level_from: int = Form(...), + level_to: int = Form(...), + conn=Depends(get_conn), +): + """T110.4: bulk re-rate every memory in this chat at ``level_from`` + to ``level_to``. + + Fans out into one ``manual_edit`` event per matching memory rather + than a single bulk event so the §6.4 audit trail stays per-row — + each affected memory carries its own ``prior_value -> new_value`` + snapshot, so an inverse edit can restore an individual row without + needing to inspect a bulk payload's member list. The drawer's + significance-distribution panel surfaces the new buckets on the + refreshed partial. + + Both levels are clamped to 0..3 (matching ``edit_memory_significance``) + and a no-op (``level_from == level_to``) is rejected with 400 so a + misclick can't pad the event log with empty edits. + """ + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + + lf = max(0, min(3, int(level_from))) + lt = max(0, min(3, int(level_to))) + if lf == lt: + raise HTTPException( + status_code=400, + detail=f"level_from and level_to must differ (both = {lf})", + ) + + rows = conn.execute( + "SELECT id FROM memories WHERE chat_id = ? AND significance = ? " + "ORDER BY id ASC", + (chat_id, lf), + ).fetchall() + for row in rows: + memory_id = int(row[0]) + append_and_apply( + conn, + kind="manual_edit", + payload={ + "target_kind": "memory_significance", + "target_id": memory_id, + "prior_value": lf, + "new_value": lt, + }, + ) + return await drawer(chat_id, request, conn) + + @router.post( "/chats/{chat_id}/drawer/memory/{memory_id}/pin", response_class=HTMLResponse, diff --git a/tests/test_drawer_phase4.py b/tests/test_drawer_phase4.py index 20b428e..be4d854 100644 --- a/tests/test_drawer_phase4.py +++ b/tests/test_drawer_phase4.py @@ -523,6 +523,95 @@ def test_delete_impact_modal_escapes_user_controllable_strings(client, tmp_path) assert "<script>alert" in body +def test_bulk_significance_re_rate_emits_manual_edit_per_memory(client, tmp_path): + """T110.4: bulk significance re-rate fans out into one + ``manual_edit`` event per matching memory — preserving the per-row + audit trail (and reversibility) instead of collapsing everything + into a single bulk event. + + Seed five memories at significance 0, bulk re-rate 0 -> 2, and + verify five new ``memory_significance`` ``manual_edit`` rows landed + AND every memory now sits at significance 2. + """ + db = tmp_path / "test.db" + _seed_chat(db) + + # Five memories at significance 0. + with open_db(db) as conn: + for i in range(5): + append_and_apply( + conn, + kind="memory_written", + payload={ + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "pov_summary": f"low-sig memory {i}", + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "significance": 0, + }, + ) + # Plus one memory at significance 1 to verify the re-rate is + # scoped to ``level_from`` and doesn't sweep the whole chat. + append_and_apply( + conn, + kind="memory_written", + payload={ + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "pov_summary": "already-rated memory", + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "significance": 1, + }, + ) + prior_manual_edits = conn.execute( + "SELECT COUNT(*) FROM event_log WHERE kind = 'manual_edit'" + ).fetchone()[0] + + response = client.post( + "/chats/chat_bot_a/drawer/memory/significance/bulk", + data={"level_from": "0", "level_to": "2"}, + ) + assert response.status_code == 200 + + with open_db(db) as conn: + # Five new manual_edit rows, one per matching memory. + new_manual_edits = conn.execute( + "SELECT COUNT(*) FROM event_log WHERE kind = 'manual_edit'" + ).fetchone()[0] + assert new_manual_edits - prior_manual_edits == 5 + + # Every emitted edit is a memory_significance edit with prior=0 + # and new=2. + import json as _json + + rows = conn.execute( + "SELECT payload_json FROM event_log " + "WHERE kind = 'manual_edit' " + "ORDER BY id DESC LIMIT 5" + ).fetchall() + for r in rows: + payload = _json.loads(r[0]) + assert payload["target_kind"] == "memory_significance" + assert payload["prior_value"] == 0 + assert payload["new_value"] == 2 + + # Projection caught up — five memories at sig=2, the untouched + # one stays at sig=1, none remain at sig=0. + dist = dict( + conn.execute( + "SELECT significance, COUNT(*) FROM memories " + "WHERE chat_id = 'chat_bot_a' GROUP BY significance" + ).fetchall() + ) + assert dist.get(0, 0) == 0 + assert dist.get(1, 0) == 1 + assert dist.get(2, 0) == 5 + + def test_delete_turn_with_event_id_zero_returns_400(client, tmp_path): """T110.1: ``event_id <= 0`` is an obvious client error and must NOT silently rewind the entire log via ``after_event_id = -1``. The route -- 2.52.0 From fa87ab8c552acf722a98e864f0dfbc0e8c0bcdcc Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 05:30:32 -0400 Subject: [PATCH 12/24] feat: cross-chat search FTS snippet highlighting (T111.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the ``pov_summary`` column in ``search_all_memories``'s SELECT with ``snippet(memories_fts, 0, '', '', '…', 32)`` so each match in a result row is wrapped in ```` for the search-results UI. The original ``pov_summary`` is still returned alongside as a non-highlighted fallback. Template renders ``r.snippet|safe`` — the only HTML in the snippet output is the configured ```` markers, so it is safe to bypass Jinja's auto-escape. --- chat/services/cross_chat_search.py | 30 ++++++++++++++++++++++++------ chat/templates/search.html | 10 +++++++++- chat/web/search.py | 8 ++++++++ tests/test_search_ux.py | 16 ++++++++++++++++ 4 files changed, 57 insertions(+), 7 deletions(-) diff --git a/chat/services/cross_chat_search.py b/chat/services/cross_chat_search.py index cb0403f..2e10f71 100644 --- a/chat/services/cross_chat_search.py +++ b/chat/services/cross_chat_search.py @@ -26,13 +26,19 @@ def search_all_memories( """Search FTS5 across all owners and chats. Returns rows with ``{memory_id, owner_id, chat_id, scene_id, - pov_summary, significance, ts, fts_rank}``, sorted by FTS5 BM25 - rank ascending (lower rank = stronger match, surfaced first). + pov_summary, snippet, significance, ts, fts_rank}``, sorted by FTS5 + BM25 rank ascending (lower rank = stronger match, surfaced first). The ``memories`` table has no ``ts`` column; we expose ``created_at`` (the projector-side row insertion timestamp) under that key so the UI does not have to know the storage name. + ``snippet`` (T111.1) is the FTS5 ``snippet()`` output for the + matched ``pov_summary`` column: a windowed excerpt with each match + token wrapped in ``...`` for the search-results UI to + render verbatim. The full ``pov_summary`` is also returned so + non-highlighted callers (or fallbacks) keep the original string. + An empty / whitespace-only ``query`` short-circuits to ``[]`` to avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar "no input yet" state from triggering a full-table scan. @@ -45,9 +51,20 @@ def search_all_memories( # from the content table because the FTS index only stores # ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns # negative scores where lower is better. + # + # ``snippet(memories_fts, 0, ...)`` (T111.1) targets column 0 of the + # FTS virtual table, which is ``pov_summary`` (the only column + # indexed by ``CREATE VIRTUAL TABLE memories_fts USING fts5( + # pov_summary, ...)`` in migration 0006). SQLite passes the raw + # column text through verbatim aside from inserting the configured + # before/after match markers, so the only HTML in the output is the + # ```` we injected — safe to render with ``|safe`` server-side. rows = conn.execute( "SELECT m.id, m.owner_id, m.chat_id, m.scene_id, " - " m.pov_summary, m.significance, m.created_at, " + " m.pov_summary, " + " snippet(memories_fts, 0, '', '', '…', 32) " + " AS snippet, " + " m.significance, m.created_at, " " memories_fts.rank " "FROM memories_fts " "JOIN memories m ON m.id = memories_fts.rowid " @@ -64,9 +81,10 @@ def search_all_memories( "chat_id": r[2], "scene_id": r[3], "pov_summary": r[4], - "significance": r[5], - "ts": r[6], - "fts_rank": r[7], + "snippet": r[5], + "significance": r[6], + "ts": r[7], + "fts_rank": r[8], } for r in rows ] diff --git a/chat/templates/search.html b/chat/templates/search.html index ee61c24..527ee86 100644 --- a/chat/templates/search.html +++ b/chat/templates/search.html @@ -28,7 +28,15 @@ {% if r.chat_name %}· {{ r.chat_name }}{% endif %} {% if r.scene_label %}· scene {{ r.scene_label }}{% endif %}
    -
    {{ r.pov_summary }}
    + {# T111.1: ``r.snippet`` is the FTS5 ``snippet()`` excerpt with + each match wrapped in ``...``. ``|safe`` is + required so the marker tags survive Jinja's auto-escape; the + snippet is built by SQLite from indexed text, so the only + HTML in the string is the ```` we configured (any + special chars from the source content are passed through as + literal text, NOT as HTML). This is the only ``|safe`` filter + on the page — chat_id, owner_name, etc. remain auto-escaped. #} +
    {{ r.snippet|safe }}
    {% endfor %} diff --git a/chat/web/search.py b/chat/web/search.py index 458c7c7..cf1974a 100644 --- a/chat/web/search.py +++ b/chat/web/search.py @@ -200,6 +200,14 @@ async def search(request: Request, q: str = "", conn=Depends(get_conn)): scene.get("started_at") if scene else None ), "pov_summary": row["pov_summary"], + # T111.1: ``snippet`` is the FTS5 windowed excerpt with + # ```` tags around each match. Falls back to the + # full ``pov_summary`` if the row lacks a snippet (which + # shouldn't happen on this code path because every + # ``raw_results`` row came from a MATCH query, but we + # guard defensively so the template never renders + # ``None``). + "snippet": row.get("snippet") or row["pov_summary"], "significance": row["significance"], "ts": row["ts"], } diff --git a/tests/test_search_ux.py b/tests/test_search_ux.py index 013337b..5afbbb4 100644 --- a/tests/test_search_ux.py +++ b/tests/test_search_ux.py @@ -136,6 +136,22 @@ def test_result_links_navigate_to_chat(client, tmp_path): assert 'href="/chats/chat_a"' in resp.text +def test_search_results_include_fts_snippet_with_highlight(client, tmp_path): + """T111.1: FTS snippet() wraps each match in ``...`` so + the result row visually highlights the term that matched. + + The seeded ``pov_summary`` is ``the rabbit darted across chat_a``; + SQLite's ``snippet()`` returns the column text with each match token + wrapped — searching for ``rabbit`` yields a snippet containing + ``rabbit``. Assertion is just that the marker appears + (the snippet may be truncated with an ellipsis when the indexed text + runs longer than the configured token window).""" + _seed_two_chats_with_memories(tmp_path / "test.db") + resp = client.get("/search?q=rabbit") + assert resp.status_code == 200 + assert "rabbit" in resp.text + + def test_search_results_use_batched_lookups(client, tmp_path): """T106: hydration must not fan out to per-row ``get_bot``/ ``get_chat``/``get_scene`` calls. -- 2.52.0 From 9987da2c0747a8a7761a6c38353e1659361d85d6 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 05:42:17 -0400 Subject: [PATCH 13/24] feat: cross-chat search deep-links to turn via memories.event_id (T111.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ``m.event_id`` (T109's nullable column from migration 0014) to ``search_all_memories``'s SELECT, propagate it through the route's template context, and have ``search.html`` build result links as ``/chats/{chat_id}#turn-{event_id}`` — matching the ``id="turn-{event_id}"`` anchor that Phase 3.5 T86 stamps on each turn DOM node so the chat page scrolls to the originating turn on load. Memory rows projected before the 0014 migration ran read NULL ``event_id``; the template falls back to a chat-level link in that case so we never emit ``#turn-None``. Pre-existing tests that asserted on the bare ``href="/chats/{chat_id}"`` contract are updated to assert on the ``href="/chats/{chat_id}#turn-`` prefix to reflect the new deep-link. --- chat/services/cross_chat_search.py | 26 ++++++++++++++++++-------- chat/templates/search.html | 9 ++++++++- chat/web/search.py | 7 +++++++ tests/test_phase4_integration.py | 14 ++++++++------ tests/test_search_ux.py | 30 ++++++++++++++++++++++++++---- 5 files changed, 67 insertions(+), 19 deletions(-) diff --git a/chat/services/cross_chat_search.py b/chat/services/cross_chat_search.py index 2e10f71..d582610 100644 --- a/chat/services/cross_chat_search.py +++ b/chat/services/cross_chat_search.py @@ -26,8 +26,17 @@ def search_all_memories( """Search FTS5 across all owners and chats. Returns rows with ``{memory_id, owner_id, chat_id, scene_id, - pov_summary, snippet, significance, ts, fts_rank}``, sorted by FTS5 - BM25 rank ascending (lower rank = stronger match, surfaced first). + event_id, pov_summary, snippet, significance, ts, fts_rank}``, + sorted by FTS5 BM25 rank ascending (lower rank = stronger match, + surfaced first). + + ``event_id`` (T111.2 / T109) is the id of the ``event_log`` row that + drove the projecting ``memory_written`` event. May be ``None`` for + memory rows projected before the 0014 schema migration ran (the + column is nullable on purpose; T109 did not backfill historical + rows). The search-results UI uses it to deep-link to the originating + turn anchor (Phase 3.5 T86 stamps ``id="turn-{event_id}"`` on each + turn DOM node) and falls back to a chat-level link when ``None``. The ``memories`` table has no ``ts`` column; we expose ``created_at`` (the projector-side row insertion timestamp) under that key so the @@ -60,7 +69,7 @@ def search_all_memories( # before/after match markers, so the only HTML in the output is the # ```` we injected — safe to render with ``|safe`` server-side. rows = conn.execute( - "SELECT m.id, m.owner_id, m.chat_id, m.scene_id, " + "SELECT m.id, m.owner_id, m.chat_id, m.scene_id, m.event_id, " " m.pov_summary, " " snippet(memories_fts, 0, '', '', '…', 32) " " AS snippet, " @@ -80,11 +89,12 @@ def search_all_memories( "owner_id": r[1], "chat_id": r[2], "scene_id": r[3], - "pov_summary": r[4], - "snippet": r[5], - "significance": r[6], - "ts": r[7], - "fts_rank": r[8], + "event_id": r[4], + "pov_summary": r[5], + "snippet": r[6], + "significance": r[7], + "ts": r[8], + "fts_rank": r[9], } for r in rows ] diff --git a/chat/templates/search.html b/chat/templates/search.html index 527ee86..ce0e8c7 100644 --- a/chat/templates/search.html +++ b/chat/templates/search.html @@ -21,7 +21,14 @@