From 77f163608662715ae83618d30846780ac1d460d1 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:22:27 -0400 Subject: [PATCH 01/26] feat: branches table + projector handlers (T89) --- chat/db/migrations/0013_branches.sql | 17 ++++ chat/state/branches.py | 133 +++++++++++++++++++++++++ tests/test_branches_state.py | 141 +++++++++++++++++++++++++++ 3 files changed, 291 insertions(+) create mode 100644 chat/db/migrations/0013_branches.sql create mode 100644 chat/state/branches.py create mode 100644 tests/test_branches_state.py diff --git a/chat/db/migrations/0013_branches.sql b/chat/db/migrations/0013_branches.sql new file mode 100644 index 0000000..53a6bdf --- /dev/null +++ b/chat/db/migrations/0013_branches.sql @@ -0,0 +1,17 @@ +CREATE TABLE branches ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + origin_event_id INTEGER NOT NULL, + head_event_id INTEGER NOT NULL, + chat_id TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + is_active INTEGER NOT NULL DEFAULT 0 +); + +-- Exactly one row may have is_active = 1 at any time. +CREATE UNIQUE INDEX branches_active_idx ON branches(is_active) WHERE is_active = 1; + +-- Bootstrap the main branch. origin_event_id=0 + head_event_id=0 are +-- placeholder seeds; the orchestrator updates head as new events land. +INSERT INTO branches (name, origin_event_id, head_event_id, is_active) +VALUES ('main', 0, 0, 1); diff --git a/chat/state/branches.py b/chat/state/branches.py new file mode 100644 index 0000000..101627e --- /dev/null +++ b/chat/state/branches.py @@ -0,0 +1,133 @@ +"""Branches projector + readers (T89, Phase 4). + +A branch is a named fork of the event log. The 'main' branch is bootstrapped +by migration 0013 with is_active=1. Subsequent branches reference an +origin_event_id (the event they forked from). Phase 4 enables creation +and switching; the read-side filter (event readers consulting is_active) +is a Phase 4.5 follow-up — for now branches are metadata-only and the +existing event readers remain branch-agnostic. +""" + +from __future__ import annotations +from sqlite3 import Connection + +from chat.eventlog.projector import on +from chat.eventlog.log import Event + + +@on("branch_created") +def _apply_branch_created(conn: Connection, e: Event) -> None: + """Insert a new branch row with is_active=0. Idempotent via INSERT OR IGNORE.""" + p = e.payload + conn.execute( + "INSERT OR IGNORE INTO branches " + "(name, origin_event_id, head_event_id, chat_id, is_active) " + "VALUES (?, ?, ?, ?, 0)", + ( + p["name"], + int(p["origin_event_id"]), + int(p.get("head_event_id", p["origin_event_id"])), + p.get("chat_id"), + ), + ) + + +@on("branch_switched") +def _apply_branch_switched(conn: Connection, e: Event) -> None: + """Set is_active=1 on the named branch and is_active=0 on all others. + + Atomic via two UPDATEs ordered to avoid the unique-active-index race. + """ + p = e.payload + name = p["name"] + # Clear ALL is_active flags first (avoids the unique-index trip). + conn.execute("UPDATE branches SET is_active = 0 WHERE is_active = 1") + conn.execute( + "UPDATE branches SET is_active = 1 WHERE name = ?", + (name,), + ) + + +@on("branch_head_updated") +def _apply_branch_head_updated(conn: Connection, e: Event) -> None: + """Update head_event_id on the named branch.""" + p = e.payload + conn.execute( + "UPDATE branches SET head_event_id = ? WHERE name = ?", + (int(p["head_event_id"]), p["name"]), + ) + + +def get_branch(conn: Connection, name: str) -> dict | None: + row = conn.execute( + "SELECT id, name, origin_event_id, head_event_id, chat_id, " + " created_at, is_active " + "FROM branches WHERE name = ?", + (name,), + ).fetchone() + if not row: + return None + return { + "id": row[0], + "name": row[1], + "origin_event_id": row[2], + "head_event_id": row[3], + "chat_id": row[4], + "created_at": row[5], + "is_active": bool(row[6]), + } + + +def list_branches(conn: Connection, chat_id: str | None = None) -> list[dict]: + if chat_id is None: + rows = conn.execute( + "SELECT id, name, origin_event_id, head_event_id, chat_id, " + " created_at, is_active " + "FROM branches ORDER BY id ASC" + ).fetchall() + else: + rows = conn.execute( + "SELECT id, name, origin_event_id, head_event_id, chat_id, " + " created_at, is_active " + "FROM branches WHERE chat_id = ? OR chat_id IS NULL " + "ORDER BY id ASC", + (chat_id,), + ).fetchall() + return [ + { + "id": r[0], + "name": r[1], + "origin_event_id": r[2], + "head_event_id": r[3], + "chat_id": r[4], + "created_at": r[5], + "is_active": bool(r[6]), + } + for r in rows + ] + + +def active_branch(conn: Connection) -> dict | None: + row = conn.execute( + "SELECT id, name, origin_event_id, head_event_id, chat_id, " + " created_at, is_active " + "FROM branches WHERE is_active = 1" + ).fetchone() + if not row: + return None + return { + "id": row[0], + "name": row[1], + "origin_event_id": row[2], + "head_event_id": row[3], + "chat_id": row[4], + "created_at": row[5], + "is_active": bool(row[6]), + } + + +__all__ = [ + "get_branch", + "list_branches", + "active_branch", +] diff --git a/tests/test_branches_state.py b/tests/test_branches_state.py new file mode 100644 index 0000000..ace2e8e --- /dev/null +++ b/tests/test_branches_state.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +from chat.db.connection import open_db +from chat.db.migrate import apply_migrations +from chat.eventlog.log import append_event +from chat.eventlog.projector import project +import chat.state.branches # registers handlers +from chat.state.branches import active_branch, get_branch, list_branches + + +def test_main_branch_bootstrapped_by_migration(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + active = active_branch(conn) + assert active is not None + assert active["name"] == "main" + assert active["is_active"] is True + assert active["origin_event_id"] == 0 + assert active["head_event_id"] == 0 + + +def test_branch_created_inserts_row(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + append_event( + conn, + kind="branch_created", + payload={ + "name": "experiment", + "origin_event_id": 42, + "chat_id": "chat_a", + }, + ) + project(conn) + + b = get_branch(conn, "experiment") + assert b is not None + assert b["name"] == "experiment" + assert b["origin_event_id"] == 42 + # head defaults to origin when not specified + assert b["head_event_id"] == 42 + assert b["chat_id"] == "chat_a" + assert b["is_active"] is False + + # main remains active + active = active_branch(conn) + assert active is not None + assert active["name"] == "main" + + +def test_branch_switched_atomic(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + append_event( + conn, + kind="branch_created", + payload={ + "name": "experiment", + "origin_event_id": 5, + "chat_id": "chat_a", + }, + ) + append_event( + conn, + kind="branch_switched", + payload={"name": "experiment"}, + ) + project(conn) + + active = active_branch(conn) + assert active is not None + assert active["name"] == "experiment" + + main = get_branch(conn, "main") + assert main is not None + assert main["is_active"] is False + + # switch back + append_event( + conn, + kind="branch_switched", + payload={"name": "main"}, + ) + project(conn) + + active2 = active_branch(conn) + assert active2 is not None + assert active2["name"] == "main" + + experiment = get_branch(conn, "experiment") + assert experiment is not None + assert experiment["is_active"] is False + + +def test_branch_head_updated_changes_head(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + append_event( + conn, + kind="branch_created", + payload={ + "name": "experiment", + "origin_event_id": 10, + "head_event_id": 10, + "chat_id": "chat_a", + }, + ) + append_event( + conn, + kind="branch_head_updated", + payload={"name": "experiment", "head_event_id": 20}, + ) + project(conn) + + b = get_branch(conn, "experiment") + assert b is not None + assert b["head_event_id"] == 20 + + +def test_list_branches_returns_all(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + append_event( + conn, + kind="branch_created", + payload={ + "name": "experiment", + "origin_event_id": 1, + "chat_id": "chat_a", + }, + ) + project(conn) + + names = [b["name"] for b in list_branches(conn)] + assert "main" in names + assert "experiment" in names -- 2.52.0 From 0ba374b790519f26ce65d34d14490cdaaea0263d Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:22:32 -0400 Subject: [PATCH 02/26] feat: embeddings table + projector handlers (pure-Python cosine, T88) --- chat/db/migrations/0012_embeddings.sql | 14 ++ chat/state/embeddings.py | 105 ++++++++++++ tests/test_embeddings_state.py | 218 +++++++++++++++++++++++++ 3 files changed, 337 insertions(+) create mode 100644 chat/db/migrations/0012_embeddings.sql create mode 100644 chat/state/embeddings.py create mode 100644 tests/test_embeddings_state.py diff --git a/chat/db/migrations/0012_embeddings.sql b/chat/db/migrations/0012_embeddings.sql new file mode 100644 index 0000000..23b524e --- /dev/null +++ b/chat/db/migrations/0012_embeddings.sql @@ -0,0 +1,14 @@ +-- Embeddings stored as JSON arrays (pure-Python cosine at query time). +-- Phase 4.5+ may swap to sqlite-vec when the host Python supports +-- loadable extensions; the schema is intentionally simple to make that +-- migration straightforward. +CREATE TABLE embeddings ( + memory_id INTEGER PRIMARY KEY, + vector_json TEXT NOT NULL, -- JSON array of floats, length = dim + model TEXT NOT NULL, + dim INTEGER NOT NULL, + indexed_at TEXT NOT NULL DEFAULT (datetime('now')), + FOREIGN KEY (memory_id) REFERENCES memories(id) +); + +CREATE INDEX embeddings_model_idx ON embeddings(model); diff --git a/chat/state/embeddings.py b/chat/state/embeddings.py new file mode 100644 index 0000000..10bd5f9 --- /dev/null +++ b/chat/state/embeddings.py @@ -0,0 +1,105 @@ +"""Embeddings projector + readers (T88, Phase 4). + +Embeddings are stored as JSON-serialized float arrays in a regular +SQLite table. Cosine similarity is computed in Python at query time +(see chat/services/vector_search.py / T92). This deliberately avoids +the sqlite-vec extension dependency — the host Python build doesn't +support enable_load_extension. Phase 4.5+ may revisit if memory counts +grow beyond pure-Python feasibility (~few thousand per query). +""" + +from __future__ import annotations +import json +from sqlite3 import Connection + +from chat.eventlog.projector import on +from chat.eventlog.log import Event + + +@on("embedding_indexed") +def _apply_embedding_indexed(conn: Connection, e: Event) -> None: + """Insert or replace the embedding for a memory. + + Idempotent: re-projection or re-indexing replaces the prior vector. + """ + p = e.payload + vector = p["vector"] + conn.execute( + "INSERT OR REPLACE INTO embeddings " + "(memory_id, vector_json, model, dim, indexed_at) " + "VALUES (?, ?, ?, ?, datetime('now'))", + ( + int(p["memory_id"]), + json.dumps(list(vector)), + p["model"], + int(p.get("dim") or len(vector)), + ), + ) + + +@on("embedding_deindexed") +def _apply_embedding_deindexed(conn: Connection, e: Event) -> None: + """Remove the embedding for a memory (used by reset cascade).""" + p = e.payload + conn.execute( + "DELETE FROM embeddings WHERE memory_id = ?", + (int(p["memory_id"]),), + ) + + +def get_embedding(conn: Connection, memory_id: int) -> dict | None: + row = conn.execute( + "SELECT memory_id, vector_json, model, dim, indexed_at " + "FROM embeddings WHERE memory_id = ?", + (memory_id,), + ).fetchone() + if not row: + return None + return { + "memory_id": row[0], + "vector": json.loads(row[1]), + "model": row[2], + "dim": row[3], + "indexed_at": row[4], + } + + +def list_embeddings_for_owner(conn: Connection, owner_id: str) -> list[dict]: + """Return all embeddings for memories owned by ``owner_id``. + + Used by vector search at query time (T92). The join carries the + fields the cosine ranker needs to assemble result rows without a + second round-trip: the POV summary text, significance, and witness + flags. The ``memories`` table has no separate ``text`` column — + ``pov_summary`` is the canonical narrative text per + ``chat/services/memory_write.py``. + """ + rows = conn.execute( + "SELECT e.memory_id, e.vector_json, e.model, e.dim, " + " m.pov_summary, m.significance, " + " m.witness_you, m.witness_host, m.witness_guest " + "FROM embeddings e " + "JOIN memories m ON m.id = e.memory_id " + "WHERE m.owner_id = ?", + (owner_id,), + ).fetchall() + return [ + { + "memory_id": r[0], + "vector": json.loads(r[1]), + "model": r[2], + "dim": r[3], + "pov_summary": r[4], + "significance": r[5], + "witness_you": r[6], + "witness_host": r[7], + "witness_guest": r[8], + } + for r in rows + ] + + +__all__ = [ + "get_embedding", + "list_embeddings_for_owner", +] diff --git a/tests/test_embeddings_state.py b/tests/test_embeddings_state.py new file mode 100644 index 0000000..a28d244 --- /dev/null +++ b/tests/test_embeddings_state.py @@ -0,0 +1,218 @@ +from __future__ import annotations + +from chat.db.connection import open_db +from chat.db.migrate import apply_migrations +from chat.eventlog.log import append_event +from chat.eventlog.projector import project +import chat.state.memory # registers memory_written handler +import chat.state.embeddings # registers embedding handlers +from chat.state.embeddings import get_embedding, list_embeddings_for_owner + + +def _base_memory(**overrides): + payload = { + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "scene_id": 1, + "pov_summary": "She laughed at his joke about owls.", + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "chat_clock_at": "2026-04-26T10:00:00", + "source": "direct", + "reliability": 1.0, + "significance": 1, + "pinned": 0, + "auto_pinned": 0, + } + payload.update(overrides) + return payload + + +def _vec(n: int = 384, base: float = 0.1) -> list[float]: + """Return a length-n float vector with predictable values for assertions.""" + return [round(base + i * 0.001, 6) for i in range(n)] + + +def test_embedding_indexed_inserts_row(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + append_event(conn, kind="memory_written", payload=_base_memory()) + project(conn) + memory_id = conn.execute("SELECT id FROM memories").fetchone()[0] + + vector = _vec(384, base=0.1) + append_event( + conn, + kind="embedding_indexed", + payload={ + "memory_id": memory_id, + "vector": vector, + "model": "test-model", + "dim": 384, + }, + ) + project(conn) + + emb = get_embedding(conn, memory_id) + assert emb is not None + assert emb["memory_id"] == memory_id + assert emb["vector"] == vector + assert emb["model"] == "test-model" + assert emb["dim"] == 384 + assert emb["indexed_at"] is not None + + +def test_embedding_deindexed_removes_row(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + append_event(conn, kind="memory_written", payload=_base_memory()) + project(conn) + memory_id = conn.execute("SELECT id FROM memories").fetchone()[0] + + append_event( + conn, + kind="embedding_indexed", + payload={ + "memory_id": memory_id, + "vector": _vec(), + "model": "test-model", + "dim": 384, + }, + ) + project(conn) + assert get_embedding(conn, memory_id) is not None + + append_event( + conn, + kind="embedding_deindexed", + payload={"memory_id": memory_id}, + ) + project(conn) + assert get_embedding(conn, memory_id) is None + + +def test_embedding_indexed_replaces_existing(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + append_event(conn, kind="memory_written", payload=_base_memory()) + project(conn) + memory_id = conn.execute("SELECT id FROM memories").fetchone()[0] + + vec_a = _vec(384, base=0.1) + vec_b = _vec(384, base=0.5) + append_event( + conn, + kind="embedding_indexed", + payload={ + "memory_id": memory_id, + "vector": vec_a, + "model": "test-model", + "dim": 384, + }, + ) + project(conn) + first = get_embedding(conn, memory_id) + assert first is not None + assert first["vector"] == vec_a + + append_event( + conn, + kind="embedding_indexed", + payload={ + "memory_id": memory_id, + "vector": vec_b, + "model": "test-model", + "dim": 384, + }, + ) + project(conn) + second = get_embedding(conn, memory_id) + assert second is not None + assert second["vector"] == vec_b + # Still exactly one row for this memory. + count = conn.execute( + "SELECT COUNT(*) FROM embeddings WHERE memory_id = ?", (memory_id,) + ).fetchone()[0] + assert count == 1 + + +def test_list_embeddings_for_owner_returns_joined_rows(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + # Two memories for bot_a, one for bot_b. + append_event( + conn, + kind="memory_written", + payload=_base_memory( + owner_id="bot_a", + pov_summary="Alpha memory.", + significance=2, + ), + ) + append_event( + conn, + kind="memory_written", + payload=_base_memory( + owner_id="bot_a", + pov_summary="Beta memory.", + significance=3, + ), + ) + append_event( + conn, + kind="memory_written", + payload=_base_memory( + owner_id="bot_b", + pov_summary="Gamma memory.", + significance=1, + ), + ) + project(conn) + + rows = conn.execute( + "SELECT id, owner_id FROM memories ORDER BY id" + ).fetchall() + # Index every memory with a distinct vector so we can check ordering. + for i, (mid, _owner) in enumerate(rows): + append_event( + conn, + kind="embedding_indexed", + payload={ + "memory_id": mid, + "vector": _vec(384, base=0.1 * (i + 1)), + "model": "test-model", + "dim": 384, + }, + ) + project(conn) + + a_rows = list_embeddings_for_owner(conn, "bot_a") + assert len(a_rows) == 2 + summaries = {r["pov_summary"] for r in a_rows} + assert summaries == {"Alpha memory.", "Beta memory."} + sigs = {r["significance"] for r in a_rows} + assert sigs == {2, 3} + for r in a_rows: + assert r["model"] == "test-model" + assert r["dim"] == 384 + assert isinstance(r["vector"], list) + assert len(r["vector"]) == 384 + assert r["witness_you"] == 1 + assert r["witness_host"] == 1 + assert r["witness_guest"] == 0 + + b_rows = list_embeddings_for_owner(conn, "bot_b") + assert len(b_rows) == 1 + assert b_rows[0]["pov_summary"] == "Gamma memory." + + +def test_get_embedding_returns_none_when_missing(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + assert get_embedding(conn, 999) is None -- 2.52.0 From c06a32767b7303c86f299740af6cf5dd7d9d905a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:23:15 -0400 Subject: [PATCH 03/26] perf: read_recent_dialogue pushes chat-id filter into SQL (T90.1) The previous implementation pulled the last N rows in SQL across all chats and dropped foreign-chat rows in Python. With LIMIT N this could return far fewer than N relevant rows when other chats had recent activity. Push the chat_id filter into SQL via json_extract so LIMIT N always returns N rows scoped to the requested chat. Test: seeds two chats with 60 turns each interleaved; queries chat_a with limit=50; asserts exactly 50 chat_a rows returned (was 0 prior to the fix because chat_b's rows dominated the global tail). --- chat/services/turn_common.py | 14 +++++-- tests/test_turn_common.py | 76 ++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 4 deletions(-) diff --git a/chat/services/turn_common.py b/chat/services/turn_common.py index e246314..3c63420 100644 --- a/chat/services/turn_common.py +++ b/chat/services/turn_common.py @@ -54,14 +54,21 @@ def read_recent_dialogue( regenerate to drop the original assistant_turn from its prompt context window before that row has been marked superseded (the supersede UPDATE lands at the end so the new event_id is known). + + T90.1: the chat_id filter is pushed into SQL via ``json_extract`` so + ``LIMIT N`` always returns N rows scoped to the requested chat. The + previous implementation filtered chat_id post-fetch in Python, which + let foreign-chat rows fill the LIMIT and yield fewer than N relevant + rows in busy multi-chat databases. """ if exclude_event_id is None: cur = conn.execute( "SELECT id, kind, payload_json FROM event_log " "WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') " " AND superseded_by IS NULL AND hidden = 0 " + " AND json_extract(payload_json, '$.chat_id') = ? " "ORDER BY id DESC LIMIT ?", - (limit,), + (chat_id, limit), ) else: cur = conn.execute( @@ -69,15 +76,14 @@ def read_recent_dialogue( "WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') " " AND id != ? " " AND superseded_by IS NULL AND hidden = 0 " + " AND json_extract(payload_json, '$.chat_id') = ? " "ORDER BY id DESC LIMIT ?", - (exclude_event_id, limit), + (exclude_event_id, chat_id, limit), ) rows = list(reversed(cur.fetchall())) out: list[dict] = [] for row_id, kind, payload_json in rows: p = json.loads(payload_json) - if p.get("chat_id") != chat_id: - continue if kind in ("user_turn", "user_turn_edit"): out.append( { diff --git a/tests/test_turn_common.py b/tests/test_turn_common.py index 4788fde..3bfc8ff 100644 --- a/tests/test_turn_common.py +++ b/tests/test_turn_common.py @@ -186,6 +186,82 @@ def test_read_recent_dialogue_filters_superseded_and_other_chats(tmp_path): assert ut_id is not None +def test_read_recent_dialogue_limit_respects_chat_scope(tmp_path): + """T90.1: ``read_recent_dialogue`` must push the chat_id filter into + SQL so that ``LIMIT N`` returns N rows scoped to the requested chat — + not N globally-recent rows that may then be filtered down to fewer in + Python. + + Setup: two chats with 60 turns each, interleaved. With the old + post-fetch filter, ``LIMIT 50`` would pull 50 globally-recent rows + (most or all from chat_b — the most recent inserts) and then drop + chat_b ones via the Python check, yielding far fewer than 50 chat_a + rows. After the SQL pushdown, ``LIMIT 50`` should return exactly 50 + chat_a rows. + """ + db = tmp_path / "test.db" + apply_migrations(db) + with open_db(db) as conn: + for chat_id, host_bot in (("chat_a", "bot_a"), ("chat_b", "bot_b")): + append_event( + conn, + kind="bot_authored", + payload={ + "id": host_bot, + "name": host_bot, + "persona": "...", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "", + "kickoff_prose": "", + }, + ) + append_event( + conn, + kind="chat_created", + payload={ + "id": chat_id, + "host_bot_id": host_bot, + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + # Interleave 60 user_turn rows in each chat — chat_b's go in last + # so they dominate the global tail. + for i in range(60): + append_event( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_a", + "prose": f"a-{i}", + "segments": [], + }, + ) + for i in range(60): + append_event( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_b", + "prose": f"b-{i}", + "segments": [], + }, + ) + project(conn) + + out = read_recent_dialogue(conn, "chat_a", limit=50) + + # All returned rows should belong to chat_a (texts a-* only). + assert len(out) == 50 + for entry in out: + assert entry["text"].startswith("a-"), ( + f"foreign chat row leaked: {entry!r}" + ) + + def test_gather_prior_edges_fills_missing_with_default(tmp_path): """``gather_prior_edges`` returns one entry per directed pair across ``present_ids``. Missing rows fall back to the schema default -- 2.52.0 From cc71fb4d0145de61dd405f8431554efffe8c3568 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:23:55 -0400 Subject: [PATCH 04/26] chore: clarify regenerate lifecycle warning wording (T90.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The warning said "lifecycle transitions from superseded turn ARE NOT being rolled back". When regenerating an OLDER turn, the listed transitions can include intervening-turn ones that legitimately stand on their own — they weren't authored by the superseded turn itself. Reword to "lifecycle transitions at-or-after turn " so operators reading logs aren't misled into thinking every listed event id was emitted by the target turn. Cosmetic change to a single log message. Test: extends test_regenerate_with_prior_lifecycle_logs_warning to assert the new phrasing is present and the old phrasing is gone. --- chat/services/regenerate.py | 8 ++++++-- tests/test_regenerate.py | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/chat/services/regenerate.py b/chat/services/regenerate.py index b2aba9a..0678a76 100644 --- a/chat/services/regenerate.py +++ b/chat/services/regenerate.py @@ -182,9 +182,13 @@ async def regenerate_assistant_turn( (chat_id, original_assistant_event_id), ).fetchall() if unrolled_lifecycle: + # T90.2: phrased as "at-or-after turn " rather than "from + # superseded turn" because regenerating an OLDER turn lists + # intervening-turn transitions that legitimately stand on their + # own — those weren't authored by the superseded turn itself. _log.warning( - "regenerate_assistant_turn: %d lifecycle transition(s) from " - "superseded turn %s are NOT being rolled back (Phase 4 " + "regenerate_assistant_turn: %d lifecycle transition(s) " + "at-or-after turn %s are NOT being rolled back (Phase 4 " "follow-up). Affected event ids: %s", len(unrolled_lifecycle), original_assistant_event_id, diff --git a/tests/test_regenerate.py b/tests/test_regenerate.py index d8a2d65..b6d5e92 100644 --- a/tests/test_regenerate.py +++ b/tests/test_regenerate.py @@ -757,6 +757,13 @@ def test_regenerate_with_prior_lifecycle_logs_warning(tmp_path, monkeypatch, cap # row's id. assert str(at_id) in msg assert str(completed_id) in msg + # T90.2: wording was tightened from "from superseded turn" to + # "at-or-after turn " — when regenerating an OLDER turn, the + # listed transitions may include legitimate intervening-turn ones + # that stand on their own. The new phrasing avoids implying the + # warning's target turn directly authored every listed transition. + assert "at-or-after turn" in msg + assert "from superseded turn" not in msg def test_regenerate_sibling_lookup_scoped_to_chat(tmp_path, monkeypatch): -- 2.52.0 From 0d76a6b2d698a8808a79e6669b76a6d02dec8c4b Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:25:07 -0400 Subject: [PATCH 05/26] refactor: consolidate legacy record_turn_memory into unified API (T90.3) The Phase 1 single-bot ``record_turn_memory`` lingered next to the unified ``record_turn_memory_for_present`` introduced in T84. Only test fixtures still called the legacy entry point. - Remove ``record_turn_memory`` from ``chat/services/memory_write.py``. - Update the two test_memory_write.py callers to use ``record_turn_memory_for_present(..., guest_bot_id=None)``, which produces the same ``[you=1, host=1, guest=0]`` witness mask. The unified API returns ``dict[bot_id, (event_id, memory_id)]``; tests extract the host entry. No production callers were affected. --- chat/services/memory_write.py | 56 ----------------------------------- tests/test_memory_write.py | 14 +++++++-- 2 files changed, 11 insertions(+), 59 deletions(-) diff --git a/chat/services/memory_write.py b/chat/services/memory_write.py index 12eed5d..d60c3d9 100644 --- a/chat/services/memory_write.py +++ b/chat/services/memory_write.py @@ -22,62 +22,6 @@ from sqlite3 import Connection from chat.eventlog.log import append_and_apply -def record_turn_memory( - conn: Connection, - *, - chat_id: str, - host_bot_id: str, - narrative_text: str, - scene_id: int | None = None, - chat_clock_at: str | None = None, - source: str = "direct", - significance: int = 1, -) -> tuple[int, int | None]: - """Append a ``memory_written`` event for the host bot's POV of this turn. - - Uses :func:`chat.eventlog.log.append_and_apply` (not raw - :func:`append_event`) so the new memory row is projected immediately - without re-running prior non-idempotent handlers (e.g. ``edge_update`` - deltas). - - Returns ``(event_id, memory_id)``. ``event_id`` is the row id of the - just-appended ``memory_written`` event in ``event_log``. ``memory_id`` - is the autoincrement PK of the corresponding ``memories`` row — these - are *different* numbers (event_log and memories use independent - rowid sequences) so callers needing to update significance or pin - state must use ``memory_id``. Falls back to ``None`` if the projected - row can't be located, which shouldn't happen but keeps the return - shape stable. - """ - payload: dict = { - "owner_id": host_bot_id, - "chat_id": chat_id, - "pov_summary": narrative_text, - "witness_you": 1, - "witness_host": 1, - "witness_guest": 0, - "source": source, - "reliability": 1.0, - "significance": significance, - "pinned": 0, - "auto_pinned": 0, - } - if scene_id is not None: - payload["scene_id"] = scene_id - if chat_clock_at is not None: - payload["chat_clock_at"] = chat_clock_at - - event_id = append_and_apply(conn, kind="memory_written", payload=payload) - row = conn.execute( - "SELECT id FROM memories " - "WHERE owner_id = ? AND chat_id = ? " - "ORDER BY id DESC LIMIT 1", - (host_bot_id, chat_id), - ).fetchone() - memory_id = row[0] if row else None - return event_id, memory_id - - def _write_one_memory( conn: Connection, *, diff --git a/tests/test_memory_write.py b/tests/test_memory_write.py index 77132ae..8c5253a 100644 --- a/tests/test_memory_write.py +++ b/tests/test_memory_write.py @@ -22,7 +22,7 @@ from chat.db.migrate import apply_migrations from chat.eventlog.log import append_event from chat.eventlog.projector import project from chat.llm.mock import MockLLMClient -from chat.services.memory_write import record_turn_memory, record_turn_memory_for_present +from chat.services.memory_write import record_turn_memory_for_present import chat.state.entities # noqa: F401 - register handlers import chat.state.memory # noqa: F401 import chat.state.world # noqa: F401 @@ -64,14 +64,19 @@ def test_record_turn_memory_writes_event_and_projects(tmp_path): apply_migrations(db) _seed_minimal(db) with open_db(db) as conn: - eid, mid = record_turn_memory( + # T90.3: legacy ``record_turn_memory`` was removed; the unified + # ``record_turn_memory_for_present`` with ``guest_bot_id=None`` + # produces the same single-bot witness mask [1,1,0]. + result = record_turn_memory_for_present( conn, chat_id="chat_bot_a", host_bot_id="bot_a", + guest_bot_id=None, narrative_text="BotA looks up. 'You're back late.'", scene_id=None, chat_clock_at="2026-04-26T20:00:00+00:00", ) + eid, mid = result["bot_a"] assert eid > 0 assert mid is not None and mid > 0 @@ -111,12 +116,15 @@ def test_record_turn_memory_omits_optional_fields(tmp_path): _seed_minimal(db) with open_db(db) as conn: # Call without scene_id/chat_clock_at — should default to None. - eid, mid = record_turn_memory( + # T90.3: migrated from legacy ``record_turn_memory``. + result = record_turn_memory_for_present( conn, chat_id="chat_bot_a", host_bot_id="bot_a", + guest_bot_id=None, narrative_text="A simple memory.", ) + eid, mid = result["bot_a"] assert eid > 0 assert mid is not None and mid > 0 -- 2.52.0 From 1d6768e98077e4fba398730dc61fb07211f1a10f Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:28:11 -0400 Subject: [PATCH 06/26] test: bump schema_version assertion to 13 (0012 embeddings + 0013 branches) --- tests/test_world.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_world.py b/tests/test_world.py index 6934e12..688b38f 100644 --- a/tests/test_world.py +++ b/tests/test_world.py @@ -324,11 +324,11 @@ def test_get_scene_returns_none_for_missing(tmp_path): assert active_scene(conn, "chat_missing") is None -def test_schema_version_after_migration_is_11(tmp_path): +def test_schema_version_after_migration_is_13(tmp_path): db = tmp_path / "t.db" apply_migrations(db) with open_db(db) as conn: row = conn.execute( "SELECT value FROM meta WHERE key = 'schema_version'" ).fetchone() - assert int(row[0]) == 11 + assert int(row[0]) == 13 -- 2.52.0 From c7cb0eb01e32d7d8dcc90df17a341e536351a4ab Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:31:06 -0400 Subject: [PATCH 07/26] feat: pure-Python cosine vector search service (T92) --- chat/services/vector_search.py | 79 +++++++++++ tests/test_vector_search.py | 242 +++++++++++++++++++++++++++++++++ 2 files changed, 321 insertions(+) create mode 100644 chat/services/vector_search.py create mode 100644 tests/test_vector_search.py diff --git a/chat/services/vector_search.py b/chat/services/vector_search.py new file mode 100644 index 0000000..60b179d --- /dev/null +++ b/chat/services/vector_search.py @@ -0,0 +1,79 @@ +"""Vector search service (T92, Phase 4). + +Pure-Python cosine similarity over the embeddings table. Phase 4 ships +this without sqlite-vec because the host Python build doesn't support +loadable extensions. For single-user scale (< few thousand memories +per owner), iterating in Python is sub-millisecond. + +Phase 4.5+ may swap to sqlite-vec when the host Python supports +enable_load_extension; the public API stays stable. +""" + +from __future__ import annotations +import math +from sqlite3 import Connection + +from chat.state.embeddings import list_embeddings_for_owner + + +_VALID_WITNESS_ROLES = {"you", "host", "guest"} + + +def _cosine_similarity(a: list[float], b: list[float]) -> float: + """Cosine similarity. Assumes both vectors are non-zero.""" + if len(a) != len(b): + return 0.0 + dot = sum(x * y for x, y in zip(a, b)) + norm_a = math.sqrt(sum(x * x for x in a)) or 1.0 + norm_b = math.sqrt(sum(x * x for x in b)) or 1.0 + return dot / (norm_a * norm_b) + + +def vector_search( + conn: Connection, + *, + owner_id: str, + witness_role: str, # "you" | "host" | "guest" + query_vector: list[float], + k: int = 4, +) -> list[dict]: + """Return top-K memories by cosine similarity to query_vector, + witness-filtered for the viewer's POV. Returns rows with + {memory_id, pov_summary, significance, score} sorted by score + DESC. Empty list if no embeddings indexed for this owner. + """ + if witness_role not in _VALID_WITNESS_ROLES: + raise ValueError( + f"witness_role must be one of {_VALID_WITNESS_ROLES}, got {witness_role!r}" + ) + + rows = list_embeddings_for_owner(conn, owner_id) + if not rows: + return [] + + # Witness-filter by the requesting role. + witness_key = f"witness_{witness_role}" + filtered = [r for r in rows if r.get(witness_key) == 1] + if not filtered: + return [] + + scored: list[tuple[float, dict]] = [] + for row in filtered: + score = _cosine_similarity(query_vector, row["vector"]) + scored.append( + ( + score, + { + "memory_id": row["memory_id"], + "pov_summary": row["pov_summary"], + "significance": row["significance"], + "score": score, + }, + ) + ) + + scored.sort(key=lambda t: t[0], reverse=True) + return [item for _, item in scored[:k]] + + +__all__ = ["vector_search"] diff --git a/tests/test_vector_search.py b/tests/test_vector_search.py new file mode 100644 index 0000000..7801e80 --- /dev/null +++ b/tests/test_vector_search.py @@ -0,0 +1,242 @@ +from __future__ import annotations + +import pytest + +from chat.db.connection import open_db +from chat.db.migrate import apply_migrations +from chat.eventlog.log import append_event +from chat.eventlog.projector import project +import chat.state.memory # registers memory_written handler +import chat.state.embeddings # registers embedding handlers +from chat.services.vector_search import vector_search + + +def _base_memory(**overrides): + payload = { + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "scene_id": 1, + "pov_summary": "She laughed at his joke about owls.", + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "chat_clock_at": "2026-04-26T10:00:00", + "source": "direct", + "reliability": 1.0, + "significance": 1, + "pinned": 0, + "auto_pinned": 0, + } + payload.update(overrides) + return payload + + +def _one_hot(dim: int, idx: int) -> list[float]: + """Return a one-hot vector of length ``dim`` with 1.0 at ``idx``.""" + v = [0.0] * dim + v[idx] = 1.0 + return v + + +def _seed_memory_with_embedding( + conn, + *, + owner_id: str, + pov_summary: str, + vector: list[float], + significance: int = 1, + witness_you: int = 1, + witness_host: int = 1, + witness_guest: int = 0, + model: str = "test-model", +) -> int: + append_event( + conn, + kind="memory_written", + payload=_base_memory( + owner_id=owner_id, + pov_summary=pov_summary, + significance=significance, + witness_you=witness_you, + witness_host=witness_host, + witness_guest=witness_guest, + ), + ) + project(conn) + memory_id = conn.execute( + "SELECT id FROM memories WHERE pov_summary = ?", (pov_summary,) + ).fetchone()[0] + append_event( + conn, + kind="embedding_indexed", + payload={ + "memory_id": memory_id, + "vector": vector, + "model": model, + "dim": len(vector), + }, + ) + project(conn) + return memory_id + + +def test_vector_search_returns_nearest_neighbors(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + dim = 8 + ids = [] + for i in range(5): + mid = _seed_memory_with_embedding( + conn, + owner_id="bot_a", + pov_summary=f"Memory {i}.", + vector=_one_hot(dim, i), + ) + ids.append(mid) + + # Query close to memory index 3 (one-hot at position 3, plus tiny noise). + query = _one_hot(dim, 3) + query[2] = 0.01 + + results = vector_search( + conn, + owner_id="bot_a", + witness_role="you", + query_vector=query, + k=3, + ) + assert len(results) == 3 + # Top-1 must be memory at index 3. + assert results[0]["memory_id"] == ids[3] + assert results[0]["pov_summary"] == "Memory 3." + # Score for the near-perfect match should be very close to 1.0. + assert results[0]["score"] > 0.99 + # Results sorted by score DESC. + scores = [r["score"] for r in results] + assert scores == sorted(scores, reverse=True) + # Second place should be memory index 2 (the small noise component). + assert results[1]["memory_id"] == ids[2] + + +def test_vector_search_respects_witness_filter(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + dim = 4 + # Memory visible to you=1, host=1, guest=0. + _seed_memory_with_embedding( + conn, + owner_id="bot_a", + pov_summary="Restricted.", + vector=_one_hot(dim, 0), + witness_you=1, + witness_host=1, + witness_guest=0, + ) + + # Guest sees nothing. + guest_results = vector_search( + conn, + owner_id="bot_a", + witness_role="guest", + query_vector=_one_hot(dim, 0), + k=4, + ) + assert guest_results == [] + + # Host sees the memory. + host_results = vector_search( + conn, + owner_id="bot_a", + witness_role="host", + query_vector=_one_hot(dim, 0), + k=4, + ) + assert len(host_results) == 1 + assert host_results[0]["pov_summary"] == "Restricted." + + # You also see it. + you_results = vector_search( + conn, + owner_id="bot_a", + witness_role="you", + query_vector=_one_hot(dim, 0), + k=4, + ) + assert len(you_results) == 1 + + +def test_vector_search_respects_owner_filter(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + dim = 4 + _seed_memory_with_embedding( + conn, + owner_id="bot_a", + pov_summary="Owner A memory.", + vector=_one_hot(dim, 0), + ) + _seed_memory_with_embedding( + conn, + owner_id="bot_b", + pov_summary="Owner B memory.", + vector=_one_hot(dim, 0), + ) + + a_results = vector_search( + conn, + owner_id="bot_a", + witness_role="you", + query_vector=_one_hot(dim, 0), + k=10, + ) + assert len(a_results) == 1 + assert a_results[0]["pov_summary"] == "Owner A memory." + + b_results = vector_search( + conn, + owner_id="bot_b", + witness_role="you", + query_vector=_one_hot(dim, 0), + k=10, + ) + assert len(b_results) == 1 + assert b_results[0]["pov_summary"] == "Owner B memory." + + +def test_vector_search_invalid_witness_role_raises(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + with pytest.raises(ValueError, match="witness_role"): + vector_search( + conn, + owner_id="bot_a", + witness_role="invalid", + query_vector=[1.0, 0.0, 0.0], + k=4, + ) + + +def test_vector_search_empty_when_no_embeddings_indexed(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + # Seed a memory but don't index an embedding for it. + append_event( + conn, + kind="memory_written", + payload=_base_memory(owner_id="bot_a", pov_summary="No embedding here."), + ) + project(conn) + + results = vector_search( + conn, + owner_id="bot_a", + witness_role="you", + query_vector=[1.0, 0.0, 0.0, 0.0], + k=4, + ) + assert results == [] -- 2.52.0 From caa17b417422664017b64d36841560b7fbf684b3 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:31:07 -0400 Subject: [PATCH 08/26] feat: embedding generation service (Phase 4 pseudo-embedding) (T91) --- chat/services/embeddings.py | 108 ++++++++++++++++++++++++++++++++++++ tests/test_embeddings.py | 91 ++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 chat/services/embeddings.py create mode 100644 tests/test_embeddings.py diff --git a/chat/services/embeddings.py b/chat/services/embeddings.py new file mode 100644 index 0000000..ece6eae --- /dev/null +++ b/chat/services/embeddings.py @@ -0,0 +1,108 @@ +"""Embedding generation service (T91, Phase 4). + +Wraps the embedding API call. For Phase 4's first cut we ship a +deterministic local pseudo-embedding (hash-derived) so the vector +retrieval pipeline can land without an external embedding endpoint +or heavy local dependency. Phase 4.5+ swaps to a real model — the +EmbeddingResult shape stays the same, only the generator changes. +""" + +from __future__ import annotations + +import hashlib +import math +import struct + +from pydantic import BaseModel + +from chat.llm.client import LLMClient + + +DEFAULT_EMBEDDING_DIM = 384 +DEFAULT_EMBEDDING_MODEL = "pseudo-sha256-384" +FALLBACK_EMBEDDING_MODEL = "fallback" + + +class EmbeddingResult(BaseModel): + vector: list[float] + model: str + dim: int + + +def _pseudo_embed(text: str, dim: int = DEFAULT_EMBEDDING_DIM) -> list[float]: + """Deterministic pseudo-embedding for Phase 4 first cut. + + Hashes the text with SHA-256, then expands by re-hashing each + successive block with the previous block + a counter — this gives + ``dim * 4`` bytes of fresh entropy per input rather than naively + repeating the 32-byte digest (which would collapse the vector onto + only 8 unique floats and make distinct inputs cosine-similar). + + Bytes are unpacked as little-endian int32s and rescaled to [-1, 1] + so we sidestep the float32 NaN/denormal values that ``struct.unpack + 'f'`` would otherwise produce on raw hash bytes. The result is + unit-normalized so cosine similarity reduces to a dot product. + + NOT semantically meaningful — just consistent for testing the + pipeline. Phase 4.5 should swap to a real embedding model. + """ + needed = dim * 4 # 4 bytes per int32 + seed = text.encode("utf-8") + chunks: list[bytes] = [] + counter = 0 + while sum(len(c) for c in chunks) < needed: + block = hashlib.sha256(seed + counter.to_bytes(4, "big")).digest() + chunks.append(block) + counter += 1 + full = b"".join(chunks)[:needed] + ints = struct.unpack(f"<{dim}i", full) + # Map int32 to roughly [-1, 1] — exact bound doesn't matter since we + # normalize, but keeps values numerically tame. + raw = [x / 2147483648.0 for x in ints] + norm = math.sqrt(sum(x * x for x in raw)) or 1.0 + return [x / norm for x in raw] + + +async def generate_embedding( + client: LLMClient, + *, + text: str, + model: str = DEFAULT_EMBEDDING_MODEL, + dim: int = DEFAULT_EMBEDDING_DIM, + timeout_s: float = 30.0, +) -> EmbeddingResult: + """Generate an embedding for the given text. + + Phase 4 default uses a deterministic local pseudo-embedding. If + the LLMClient grows an ``embed(...)`` method in Phase 4.5, this + wrapper will route to it when ``model != "pseudo-sha256-384"``. + + Falls back to a zero vector with ``model="fallback"`` on any + failure (callers detect the sentinel and skip indexing). For the + pseudo path, failure is structurally impossible — it's pure local + computation. + """ + if not text or not text.strip(): + # Empty input — return fallback so caller doesn't index empty rows. + return EmbeddingResult( + vector=[0.0] * dim, model=FALLBACK_EMBEDDING_MODEL, dim=dim + ) + + if model == DEFAULT_EMBEDDING_MODEL: + # Pure-local pseudo path — no LLMClient call. + return EmbeddingResult(vector=_pseudo_embed(text, dim), model=model, dim=dim) + + # Future: real embedding via client.embed(...). Phase 4.5 work. + # For Phase 4, any non-default model falls through to fallback. + return EmbeddingResult( + vector=[0.0] * dim, model=FALLBACK_EMBEDDING_MODEL, dim=dim + ) + + +__all__ = [ + "DEFAULT_EMBEDDING_DIM", + "DEFAULT_EMBEDDING_MODEL", + "FALLBACK_EMBEDDING_MODEL", + "EmbeddingResult", + "generate_embedding", +] diff --git a/tests/test_embeddings.py b/tests/test_embeddings.py new file mode 100644 index 0000000..b458681 --- /dev/null +++ b/tests/test_embeddings.py @@ -0,0 +1,91 @@ +"""Tests for the embedding generation service (T91, Phase 4). + +Phase 4's first cut ships a deterministic local pseudo-embedding so the +vector retrieval pipeline can land without an external embeddings API +or a heavy local model dependency. These tests pin the contract: + +* the result has the right shape (vector length, ``dim`` metadata), +* the default ``model`` string is reported back unchanged, +* output is byte-identical for the same input (deterministic), +* distinct inputs produce distinct vectors (so cosine actually + discriminates), +* empty / whitespace-only input collapses to the ``"fallback"`` sentinel + with a zero vector — callers detect this and skip indexing, +* the vector is unit-normalized so cosine similarity behaves. + +The pseudo path doesn't touch the LLMClient, so we pass an empty +``MockLLMClient`` — any accidental call into it would raise +``IndexError`` and surface as a regression. +""" + +from __future__ import annotations + +import math + +import pytest + +from chat.llm.mock import MockLLMClient +from chat.services.embeddings import ( + DEFAULT_EMBEDDING_DIM, + DEFAULT_EMBEDDING_MODEL, + FALLBACK_EMBEDDING_MODEL, + EmbeddingResult, + generate_embedding, +) + + +def _client() -> MockLLMClient: + # Pseudo path never calls the client — empty canned list ensures any + # accidental call raises and surfaces the regression loudly. + return MockLLMClient(canned=[]) + + +@pytest.mark.asyncio +async def test_generate_embedding_returns_vector_of_correct_dim(): + result = await generate_embedding(_client(), text="hello") + assert isinstance(result, EmbeddingResult) + assert isinstance(result.vector, list) + assert len(result.vector) == DEFAULT_EMBEDDING_DIM == 384 + assert result.dim == 384 + assert all(isinstance(x, float) for x in result.vector) + + +@pytest.mark.asyncio +async def test_generate_embedding_returns_correct_model_metadata(): + result = await generate_embedding(_client(), text="hello") + assert result.model == DEFAULT_EMBEDDING_MODEL == "pseudo-sha256-384" + + +@pytest.mark.asyncio +async def test_generate_embedding_is_deterministic(): + a = await generate_embedding(_client(), text="hello world") + b = await generate_embedding(_client(), text="hello world") + assert a.vector == b.vector + + +@pytest.mark.asyncio +async def test_generate_embedding_distinct_text_produces_distinct_vectors(): + a = await generate_embedding(_client(), text="hello world") + b = await generate_embedding(_client(), text="totally different content") + assert a.vector != b.vector + # Sanity-check cosine similarity — both vectors are unit-normalized, + # so this reduces to a plain dot product. + cosine = sum(x * y for x, y in zip(a.vector, b.vector)) + assert cosine < 0.99 + + +@pytest.mark.asyncio +async def test_generate_embedding_empty_text_returns_fallback(): + for empty in ("", " ", "\n\t"): + result = await generate_embedding(_client(), text=empty) + assert result.model == FALLBACK_EMBEDDING_MODEL == "fallback" + assert result.dim == DEFAULT_EMBEDDING_DIM + assert len(result.vector) == DEFAULT_EMBEDDING_DIM + assert all(x == 0.0 for x in result.vector) + + +@pytest.mark.asyncio +async def test_generate_embedding_unit_normalized(): + result = await generate_embedding(_client(), text="some non-empty text") + norm_sq = sum(x * x for x in result.vector) + assert math.isclose(norm_sq, 1.0, abs_tol=1e-6) -- 2.52.0 From 8f66e1123abffc208294d3ac73277339a990277a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:31:31 -0400 Subject: [PATCH 09/26] feat: cross-chat search service (T93) --- chat/services/cross_chat_search.py | 75 ++++++++++++++ tests/test_cross_chat_search.py | 155 +++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 chat/services/cross_chat_search.py create mode 100644 tests/test_cross_chat_search.py diff --git a/chat/services/cross_chat_search.py b/chat/services/cross_chat_search.py new file mode 100644 index 0000000..cb0403f --- /dev/null +++ b/chat/services/cross_chat_search.py @@ -0,0 +1,75 @@ +"""Cross-chat search service (T93, Phase 4). + +FTS5-based search across ALL owners and ALL chats. Used by the +top-bar search UX (T100) for "where did I last see this character +mention X?" queries. NO witness filter -- this is intentionally a +power-user surface that surfaces memories across POVs. + +Mirrors the FTS5 access pattern of ``chat.state.memory.search_memories`` +but drops both the ``owner_id = ?`` and the per-witness predicates so a +single query can sweep every chat in the database. The composite +re-rank is also dropped: callers want raw BM25 ordering for the +"highest match strength wins" semantics expected of a global search box. +""" + +from __future__ import annotations + +from sqlite3 import Connection + + +def search_all_memories( + conn: Connection, + *, + query: str, + k: int = 20, +) -> list[dict]: + """Search FTS5 across all owners and chats. + + Returns rows with ``{memory_id, owner_id, chat_id, scene_id, + pov_summary, significance, ts, fts_rank}``, sorted by FTS5 BM25 + rank ascending (lower rank = stronger match, surfaced first). + + The ``memories`` table has no ``ts`` column; we expose ``created_at`` + (the projector-side row insertion timestamp) under that key so the + UI does not have to know the storage name. + + An empty / whitespace-only ``query`` short-circuits to ``[]`` to + avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar + "no input yet" state from triggering a full-table scan. + """ + if not query or not query.strip(): + return [] + + # FTS5 MATCH against the same ``memories_fts`` virtual table that + # backs ``state.memory.search_memories``; the JOIN pulls metadata + # from the content table because the FTS index only stores + # ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns + # negative scores where lower is better. + rows = conn.execute( + "SELECT m.id, m.owner_id, m.chat_id, m.scene_id, " + " m.pov_summary, m.significance, m.created_at, " + " memories_fts.rank " + "FROM memories_fts " + "JOIN memories m ON m.id = memories_fts.rowid " + "WHERE memories_fts MATCH ? " + "ORDER BY memories_fts.rank ASC " + "LIMIT ?", + (query.strip(), k), + ).fetchall() + + return [ + { + "memory_id": r[0], + "owner_id": r[1], + "chat_id": r[2], + "scene_id": r[3], + "pov_summary": r[4], + "significance": r[5], + "ts": r[6], + "fts_rank": r[7], + } + for r in rows + ] + + +__all__ = ["search_all_memories"] diff --git a/tests/test_cross_chat_search.py b/tests/test_cross_chat_search.py new file mode 100644 index 0000000..4fb830e --- /dev/null +++ b/tests/test_cross_chat_search.py @@ -0,0 +1,155 @@ +"""T93 (Phase 4): cross-chat FTS5 search across all owners and chats. + +Verifies that ``chat.services.cross_chat_search.search_all_memories``: +* surfaces matches across multiple owner_ids (the per-owner restriction + used by ``state.memory.search_memories`` is intentionally absent), +* applies no witness filter (admin/power-user surface), +* orders results by FTS5 BM25 rank (lower = stronger match, surfaced + first), and +* honours the ``k`` LIMIT and the empty-query fast-path. +""" + +from __future__ import annotations + +from chat.db.connection import open_db +from chat.db.migrate import apply_migrations +from chat.eventlog.log import append_event +from chat.eventlog.projector import project +from chat.services.cross_chat_search import search_all_memories +import chat.state.memory # noqa: F401 (registers memory_written handler) + + +def _seed(db, *, memory_specs): + """Apply migrations + project a list of memory_written events.""" + apply_migrations(db) + with open_db(db) as conn: + for spec in memory_specs: + payload = { + "owner_id": spec.get("owner_id", "bot_a"), + "chat_id": spec.get("chat_id", "chat_bot_a"), + "pov_summary": spec["pov_summary"], + "witness_you": spec.get("witness_you", 1), + "witness_host": spec.get("witness_host", 1), + "witness_guest": spec.get("witness_guest", 0), + "source": "direct", + "reliability": 1.0, + "significance": spec.get("significance", 1), + "pinned": 0, + "auto_pinned": 0, + } + append_event(conn, kind="memory_written", payload=payload) + project(conn) + + +def test_search_all_memories_returns_matches_across_owners(tmp_path): + """Cross-owner: a single query must surface memories from every owner. + + The per-owner ``owner_id = ?`` predicate that ``search_memories`` uses + is intentionally absent here, so a "rabbit" memory under ``bot_a`` and + one under ``bot_b`` should both come back from a single call. + """ + db = tmp_path / "t.db" + _seed( + db, + memory_specs=[ + { + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "pov_summary": "the rabbit darted into the brambles", + }, + { + "owner_id": "bot_b", + "chat_id": "chat_bot_b", + "pov_summary": "a white rabbit watched from the hedge", + }, + # Distractor: must not appear for "rabbit". + { + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "pov_summary": "the kettle whistled", + }, + ], + ) + with open_db(db) as conn: + out = search_all_memories(conn, query="rabbit") + owners = {row["owner_id"] for row in out} + assert owners == {"bot_a", "bot_b"} + assert len(out) == 2 + # Returned shape contract. + for row in out: + assert set(row.keys()) >= { + "memory_id", + "owner_id", + "chat_id", + "scene_id", + "pov_summary", + "significance", + "ts", + "fts_rank", + } + + +def test_search_all_memories_orders_by_fts_rank(tmp_path): + """Stronger BM25 match must come first (rank ASC = lower is better).""" + db = tmp_path / "t.db" + _seed( + db, + memory_specs=[ + # Single occurrence -> weaker BM25 score. + { + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "pov_summary": "a rabbit appeared", + }, + # Triple occurrence in a short row -> stronger BM25 score. + { + "owner_id": "bot_b", + "chat_id": "chat_bot_b", + "pov_summary": "rabbit rabbit rabbit", + }, + ], + ) + with open_db(db) as conn: + out = search_all_memories(conn, query="rabbit", k=5) + assert len(out) == 2 + # Stronger match first; fts_rank monotonically non-decreasing + # (lower-is-better, so ASC). + assert out[0]["pov_summary"] == "rabbit rabbit rabbit" + assert out[0]["fts_rank"] <= out[1]["fts_rank"] + + +def test_search_all_memories_respects_k_limit(tmp_path): + """LIMIT ? must cap result count even when more matches exist.""" + db = tmp_path / "t.db" + _seed( + db, + memory_specs=[ + { + "owner_id": f"bot_{i}", + "chat_id": f"chat_{i}", + "pov_summary": f"rabbit sighting number {i}", + } + for i in range(10) + ], + ) + with open_db(db) as conn: + out = search_all_memories(conn, query="rabbit", k=3) + assert len(out) == 3 + + +def test_search_all_memories_empty_query_returns_empty(tmp_path): + """Empty / whitespace-only query must short-circuit to [].""" + db = tmp_path / "t.db" + _seed( + db, + memory_specs=[ + { + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "pov_summary": "the rabbit darted into the brambles", + }, + ], + ) + with open_db(db) as conn: + assert search_all_memories(conn, query="") == [] + assert search_all_memories(conn, query=" ") == [] -- 2.52.0 From 296e8fddddb01439520e1c44aef4ee4f9f5cb11a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:35:58 -0400 Subject: [PATCH 10/26] feat: branching service (branch_from_event + switch + metadata) (T94) --- chat/services/branching.py | 107 ++++++++++++++++++++++++++++++ tests/test_branching.py | 131 +++++++++++++++++++++++++++++++++++++ 2 files changed, 238 insertions(+) create mode 100644 chat/services/branching.py create mode 100644 tests/test_branching.py diff --git a/chat/services/branching.py b/chat/services/branching.py new file mode 100644 index 0000000..abf6ff7 --- /dev/null +++ b/chat/services/branching.py @@ -0,0 +1,107 @@ +"""Branching service (T94, Phase 4). + +Wraps branches state with validation + event emission. Phase 4 ships +the data model and creation/switching APIs; the read-side filter +(event readers consulting is_active) is a Phase 4.5+ follow-up — for +now branches are metadata-only and the existing event readers remain +branch-agnostic. The drawer UI (T98) drives create/switch via these +helpers. +""" + +from __future__ import annotations +from sqlite3 import Connection + +from chat.eventlog.log import append_and_apply +from chat.state.branches import get_branch, list_branches, active_branch # noqa: F401 + + +def branch_from_event( + conn: Connection, + *, + name: str, + origin_event_id: int, + chat_id: str | None = None, +) -> int: + """Create a new named branch forking from origin_event_id. + + Emits a branch_created event. Returns the new branch's row id. + Raises ValueError if name already exists or origin_event_id doesn't + correspond to a real event.""" + if not name or not name.strip(): + raise ValueError("branch name must be non-empty") + + if get_branch(conn, name) is not None: + raise ValueError(f"branch {name!r} already exists") + + # Validate origin_event_id is a real event id (or 0 for the bootstrap case + # which only main uses). + if origin_event_id < 0: + raise ValueError(f"origin_event_id must be >= 0, got {origin_event_id}") + if origin_event_id > 0: + row = conn.execute( + "SELECT 1 FROM event_log WHERE id = ?", (origin_event_id,) + ).fetchone() + if row is None: + raise ValueError( + f"origin_event_id {origin_event_id} does not exist in event_log" + ) + + append_and_apply( + conn, + kind="branch_created", + payload={ + "name": name, + "origin_event_id": origin_event_id, + "head_event_id": origin_event_id, # head starts at origin + "chat_id": chat_id, + }, + ) + + branch = get_branch(conn, name) + if branch is None: + # Should be unreachable if append_and_apply worked. + raise RuntimeError(f"branch {name!r} not found after creation") + return branch["id"] + + +def switch_active_branch(conn: Connection, *, name: str) -> None: + """Make the named branch active. Emits branch_switched.""" + if get_branch(conn, name) is None: + raise ValueError(f"branch {name!r} does not exist") + + append_and_apply( + conn, + kind="branch_switched", + payload={"name": name}, + ) + + +def list_branches_with_metadata( + conn: Connection, chat_id: str | None = None +) -> list[dict]: + """List branches with computed event_count metadata. + + event_count = head_event_id - origin_event_id + 1 (when both are set) + OR head_event_id (when origin is 0, e.g., main branch) + OR 0 (when head <= origin, which is the bootstrap state) + """ + branches = list_branches(conn, chat_id) + enriched = [] + for b in branches: + origin = b["origin_event_id"] + head = b["head_event_id"] + if head < origin: + event_count = 0 + elif origin == 0: + event_count = head + else: + event_count = head - origin + 1 + enriched.append({**b, "event_count": event_count}) + return enriched + + +__all__ = [ + "branch_from_event", + "switch_active_branch", + "list_branches_with_metadata", +] diff --git a/tests/test_branching.py b/tests/test_branching.py new file mode 100644 index 0000000..610bb2e --- /dev/null +++ b/tests/test_branching.py @@ -0,0 +1,131 @@ +"""Tests for the branching service (T94, Phase 4).""" + +from __future__ import annotations + +import pytest + +from chat.db.connection import open_db +from chat.db.migrate import apply_migrations +from chat.eventlog.log import append_and_apply +import chat.state.branches # noqa: F401 registers handlers +from chat.services.branching import ( + branch_from_event, + list_branches_with_metadata, + switch_active_branch, +) +from chat.state.branches import active_branch, get_branch + + +def _seed_event(conn) -> int: + """Append a benign event so we have a real event_log row to fork from. + + ``user_turn`` is a transcript-only kind with no registered projector + handler, so ``append_and_apply`` is a clean no-op on the projector + side regardless of what other handlers are imported by the suite. + """ + return append_and_apply( + conn, + kind="user_turn", + payload={"chat_id": "c1", "text": "hi"}, + ) + + +def test_branch_from_event_creates_branch_via_event(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + seed_id = _seed_event(conn) + + new_id = branch_from_event( + conn, + name="experiment", + origin_event_id=seed_id, + chat_id="c1", + ) + assert isinstance(new_id, int) and new_id > 0 + + b = get_branch(conn, "experiment") + assert b is not None + assert b["id"] == new_id + assert b["origin_event_id"] == seed_id + assert b["head_event_id"] == seed_id + assert b["chat_id"] == "c1" + assert b["is_active"] is False + + # branch_created event landed in event_log + row = conn.execute( + "SELECT COUNT(*) FROM event_log WHERE kind = 'branch_created'" + ).fetchone() + assert row[0] == 1 + + +def test_branch_from_event_duplicate_name_raises(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + seed_id = _seed_event(conn) + branch_from_event(conn, name="dup", origin_event_id=seed_id) + + with pytest.raises(ValueError, match="already exists"): + branch_from_event(conn, name="dup", origin_event_id=seed_id) + + +def test_branch_from_event_invalid_origin_raises(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + with pytest.raises(ValueError, match="does not exist"): + branch_from_event(conn, name="ghost", origin_event_id=99999) + + +def test_switch_active_branch_changes_active(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + seed_id = _seed_event(conn) + branch_from_event(conn, name="experiment", origin_event_id=seed_id) + + switch_active_branch(conn, name="experiment") + active = active_branch(conn) + assert active is not None + assert active["name"] == "experiment" + + # Switch back to main. + switch_active_branch(conn, name="main") + active2 = active_branch(conn) + assert active2 is not None + assert active2["name"] == "main" + + +def test_switch_active_branch_unknown_name_raises(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + with pytest.raises(ValueError, match="does not exist"): + switch_active_branch(conn, name="nope") + + +def test_list_branches_with_metadata_includes_event_count(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + # Seed enough events to cover origin=10 and head=15. + for _ in range(15): + _seed_event(conn) + + # Create the branch at origin=10, then bump its head to 15. + branch_from_event(conn, name="exp", origin_event_id=10) + append_and_apply( + conn, + kind="branch_head_updated", + payload={"name": "exp", "head_event_id": 15}, + ) + + rows = {b["name"]: b for b in list_branches_with_metadata(conn)} + + # main: bootstrap state — origin=0, head=0 — event_count == 0. + assert rows["main"]["event_count"] == 0 + # exp: origin=10, head=15 — event_count == 6 (inclusive). + assert rows["exp"]["origin_event_id"] == 10 + assert rows["exp"]["head_event_id"] == 15 + assert rows["exp"]["event_count"] == 6 -- 2.52.0 From 28e13d416fb0f20761034d93de5f2beabdb661f1 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:36:30 -0400 Subject: [PATCH 11/26] feat: delete-impact computation service (preview without mutation) (T95) --- chat/services/delete_impact.py | 147 +++++++++++++++++++ tests/test_delete_impact.py | 248 +++++++++++++++++++++++++++++++++ 2 files changed, 395 insertions(+) create mode 100644 chat/services/delete_impact.py create mode 100644 tests/test_delete_impact.py diff --git a/chat/services/delete_impact.py b/chat/services/delete_impact.py new file mode 100644 index 0000000..28ce422 --- /dev/null +++ b/chat/services/delete_impact.py @@ -0,0 +1,147 @@ +"""Delete-impact computation service (T95, Phase 4). + +Walks event_log forward from a target event_id and produces an ImpactReport +describing what would be removed if rewind-to-target were invoked. Pure +computation — does NOT mutate the database. Used by T98's drawer surgical- +delete UI to render an 'are you sure?' modal before invoking the actual +rewind path (chat/services/rewind.py). +""" + +from __future__ import annotations +import json +from sqlite3 import Connection + +from pydantic import BaseModel, Field + + +class DeletedItem(BaseModel): + kind: str + description: str + target_id: int | str | None = None + + +class ImpactReport(BaseModel): + target_event_id: int + cascading: list[DeletedItem] = Field(default_factory=list) + notes: list[str] = Field(default_factory=list) + + +def _excerpt(text: str, n: int = 60) -> str: + text = (text or "").strip().replace("\n", " ") + return text if len(text) <= n else text[: n - 1] + "…" + + +def compute_delete_impact( + conn: Connection, + *, + target_event_id: int, +) -> ImpactReport: + """Compute the cascading impact of rewinding to target_event_id.""" + # Verify target exists. + target_row = conn.execute( + "SELECT id, kind, payload_json FROM event_log WHERE id = ?", + (target_event_id,), + ).fetchone() + if target_row is None: + return ImpactReport( + target_event_id=target_event_id, + cascading=[], + notes=[f"target event_id {target_event_id} not found"], + ) + + # Walk forward: every event with id >= target_event_id is in scope. + rows = conn.execute( + "SELECT id, kind, payload_json FROM event_log " + "WHERE id >= ? ORDER BY id ASC", + (target_event_id,), + ).fetchall() + + cascading: list[DeletedItem] = [] + notes: list[str] = [] + scene_close_present = False + regenerated_from = None + + for row_id, kind, payload_json in rows: + try: + payload = json.loads(payload_json) if payload_json else {} + except (json.JSONDecodeError, TypeError): + payload = {} + + if kind == "memory_written": + cascading.append( + DeletedItem( + kind=kind, + description=f"memory: {_excerpt(payload.get('pov_summary', ''))}", + target_id=payload.get("memory_id"), + ) + ) + elif kind == "edge_update": + src = payload.get("source_id", "?") + tgt = payload.get("target_id", "?") + cascading.append( + DeletedItem( + kind=kind, + description=f"edge update: {src} -> {tgt}", + target_id=f"{src}->{tgt}", + ) + ) + elif kind == "scene_closed": + scene_close_present = True + cascading.append( + DeletedItem( + kind=kind, + description=f"scene close at {payload.get('closed_at', '?')}", + target_id=payload.get("scene_id"), + ) + ) + elif kind in ("user_turn", "user_turn_edit", "assistant_turn"): + speaker = payload.get("speaker_id") or ("you" if kind.startswith("user") else "?") + prose = payload.get("prose") or payload.get("text") or "" + cascading.append( + DeletedItem( + kind=kind, + description=f"turn {row_id} ({speaker}: {_excerpt(prose, 50)})", + target_id=row_id, + ) + ) + if regenerated_from is None and payload.get("regenerated_from"): + regenerated_from = payload["regenerated_from"] + elif kind == "manual_edit": + target_kind = payload.get("target_kind", "?") + cascading.append( + DeletedItem( + kind=kind, + description=f"manual edit: {target_kind}", + target_id=payload.get("target_id"), + ) + ) + else: + cascading.append( + DeletedItem( + kind=kind, + description=f"{kind} event", + target_id=row_id, + ) + ) + + # Notes / warnings. + notes.append(f"{len(rows)} events would be discarded total") + if scene_close_present: + notes.append( + "scene close events are in scope — closing-scene per-POV summaries " + "and group_node updates will be reverted" + ) + if regenerated_from is not None: + notes.append( + f"target turn was regenerated from event_id {regenerated_from}; " + f"the original turn remains intact" + ) + + return ImpactReport( + target_event_id=target_event_id, + cascading=cascading, + notes=notes, + ) + + +__all__ = ["DeletedItem", "ImpactReport", "compute_delete_impact"] diff --git a/tests/test_delete_impact.py b/tests/test_delete_impact.py new file mode 100644 index 0000000..4c00f07 --- /dev/null +++ b/tests/test_delete_impact.py @@ -0,0 +1,248 @@ +"""Tests for Task 95 — delete-impact computation service (Phase 4). + +`compute_delete_impact` walks event_log forward from a target event_id and +produces an :class:`ImpactReport` describing what would be removed if +rewind-to-target were invoked. It is a pure preview — no database mutation. +T98's drawer surgical-delete UI uses this to render an "are you sure?" +modal before invoking the actual rewind path. +""" + +from __future__ import annotations + +from chat.db.connection import open_db +from chat.db.migrate import apply_migrations +from chat.eventlog.log import append_event +from chat.services.delete_impact import compute_delete_impact + + +def _seed_chat(conn) -> tuple[int, int]: + """Append minimal bot + chat events; return their event ids.""" + bot_id = append_event( + conn, + kind="bot_authored", + payload={ + "id": "bot_a", + "name": "BotA", + "persona": "...", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "", + "kickoff_prose": "", + }, + ) + chat_id = append_event( + conn, + kind="chat_created", + payload={ + "id": "chat_bot_a", + "host_bot_id": "bot_a", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + return bot_id, chat_id + + +def test_impact_for_simple_turn_lists_memory_and_edges(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + _seed_chat(conn) + user_id = append_event( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": "hey there friend", + "segments": [], + }, + ) + append_event( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": "Hi! Good to see you.", + "truncated": False, + "user_turn_id": user_id, + }, + ) + append_event( + conn, + kind="memory_written", + payload={ + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "pov_summary": "You greeted me warmly today.", + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "source": "turn", + "reliability": 1.0, + "significance": 1, + "pinned": 0, + "auto_pinned": 0, + }, + ) + append_event( + conn, + kind="edge_update", + payload={ + "source_id": "you", + "target_id": "bot_a", + "affinity_delta": 0.1, + }, + ) + + report = compute_delete_impact(conn, target_event_id=user_id) + + assert report.target_event_id == user_id + kinds = [item.kind for item in report.cascading] + # Walk from user_turn forward — user_turn, assistant_turn, + # memory_written, edge_update should all be in scope, in order. + assert kinds == [ + "user_turn", + "assistant_turn", + "memory_written", + "edge_update", + ] + # Memory description includes the pov_summary excerpt. + mem_item = report.cascading[2] + assert "memory:" in mem_item.description + assert "greeted" in mem_item.description + # Edge description includes both endpoints. + edge_item = report.cascading[3] + assert "you" in edge_item.description + assert "bot_a" in edge_item.description + assert edge_item.target_id == "you->bot_a" + # Notes mentions total count. + assert any("4 events" in n for n in report.notes) + + +def test_impact_for_scene_opening_turn_warns_about_subsequent(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + _seed_chat(conn) + early_id = append_event( + conn, + kind="user_turn", + payload={"chat_id": "chat_bot_a", "prose": "the start", "segments": []}, + ) + append_event( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": "ok", + "truncated": False, + "user_turn_id": early_id, + }, + ) + append_event( + conn, + kind="scene_closed", + payload={ + "scene_id": 1, + "closed_at": "2026-04-26T21:00:00+00:00", + "significance": 2, + }, + ) + + report = compute_delete_impact(conn, target_event_id=early_id) + + # Scene-close warning fires when one is in scope. + assert any("scene close" in n.lower() for n in report.notes) + # The scene_closed event also appears as a cascading item. + assert any(item.kind == "scene_closed" for item in report.cascading) + + +def test_impact_for_missing_event_returns_empty_with_note(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + _seed_chat(conn) + report = compute_delete_impact(conn, target_event_id=999_999) + + assert report.cascading == [] + assert any("not found" in n for n in report.notes) + + +def test_impact_does_not_mutate_database(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + _seed_chat(conn) + user_id = append_event( + conn, + kind="user_turn", + payload={"chat_id": "chat_bot_a", "prose": "hi", "segments": []}, + ) + append_event( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": "hello", + "truncated": False, + "user_turn_id": user_id, + }, + ) + + # Snapshot all event_log rows as a tuple-of-tuples. + before = conn.execute( + "SELECT id, branch_id, ts, kind, payload_json, superseded_by, " + "hidden FROM event_log ORDER BY id" + ).fetchall() + + compute_delete_impact(conn, target_event_id=user_id) + + after = conn.execute( + "SELECT id, branch_id, ts, kind, payload_json, superseded_by, " + "hidden FROM event_log ORDER BY id" + ).fetchall() + + # Byte-identical: nothing inserted, deleted, or updated. + assert before == after + + +def test_impact_includes_regenerated_from_warning(tmp_path): + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + _seed_chat(conn) + original_id = append_event( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": "first try", + "truncated": False, + "user_turn_id": 0, + }, + ) + regen_id = append_event( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": "second try", + "truncated": False, + "user_turn_id": 0, + "regenerated_from": original_id, + }, + ) + + report = compute_delete_impact(conn, target_event_id=regen_id) + + # The regenerated_from note carries the original event id so the user + # knows the original turn isn't lost. + assert any("regenerated from" in n for n in report.notes) + assert any(str(original_id) in n for n in report.notes) -- 2.52.0 From b8b4aed6d974789054b9a662443a07195eb7bc3b Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:42:38 -0400 Subject: [PATCH 12/26] feat: combined FTS + vector retrieval ranking via RRF (T96) --- chat/state/memory.py | 193 +++++++++++++++++++++++++++++++- tests/test_memory_search.py | 214 ++++++++++++++++++++++++++++++++++++ 2 files changed, 402 insertions(+), 5 deletions(-) diff --git a/chat/state/memory.py b/chat/state/memory.py index 5310965..42a7e95 100644 --- a/chat/state/memory.py +++ b/chat/state/memory.py @@ -102,6 +102,15 @@ _RECENCY_WEIGHT = 0.5 # a higher-is-better score by a positive constant per the spec wording. SIGNIFICANCE_RANK_BIAS = 0.5 +# T96 (Phase 4): reciprocal-rank-fusion constant used when ``search_memories`` +# is given a ``query_vector`` and must merge FTS + vector candidate lists. The +# value 60 is the canonical RRF constant from Cormack et al. ("Reciprocal Rank +# Fusion outperforms Condorcet and Individual Rank Learning Methods", SIGIR +# 2009): large enough to dampen the head of either ranking so that a strong +# top-1 in ranking A doesn't crowd out a moderate top-3 in ranking B, but +# small enough that the position-1/position-2 gap still matters. +RRF_CONST = 60 + def search_memories( conn: Connection, @@ -109,6 +118,8 @@ def search_memories( witness_role: str, query: str, k: int = 4, + *, + query_vector: list[float] | None = None, ) -> list[dict]: """FTS5 search over pov_summary, scoped by owner and witness role. @@ -135,6 +146,23 @@ def search_memories( * **Python-side** — a composite re-rank with ``_SIGNIFICANCE_WEIGHT`` reinforces the ordering after candidate retrieval, alongside the recency boost above. + + PHASE 4 EXTENSION (T96): when ``query_vector`` is provided, fuses FTS and + vector hits via reciprocal-rank fusion (RRF): + + fusion_score = 1/(RRF_CONST + fts_rank) + 1/(RRF_CONST + vec_rank) + + where ``fts_rank`` and ``vec_rank`` are the 0-indexed positions of the + memory in each candidate list. Each candidate gets the sum of its + reciprocal ranks across both rankings; memories appearing in only one + ranking still get a partial score (the other term is dropped). Both + candidate lists are over-fetched at ``k * 2`` so a memory dominant in + only one channel has a fair chance to surface. The Python-side + significance + recency re-rank is then applied as a final pass to + break ties in favour of more important / more recent memories. + + When ``query_vector`` is None: FTS-only behaviour unchanged — all + Phase 1-3.5 callers see the same row shape and ordering as before. """ if witness_role not in _VALID_WITNESS_ROLES: raise ValueError( @@ -148,7 +176,10 @@ def search_memories( select_list = ", ".join(f"m.{c}" for c in cols) # Over-fetch from FTS so the Python-side re-rank has room to reorder # results that BM25 alone would have demoted past the top-k boundary. - over_fetch = max(k * 4, 20) + # When fusing with a vector ranking, we still over-fetch (k*2 from each + # channel) so memories that are weak in FTS but strong in vector — and + # vice versa — make it into the merge pool. + over_fetch = max(k * 2, 20) if query_vector is not None else max(k * 4, 20) sql = ( f"SELECT {select_list}, memories_fts.rank AS fts_rank " "FROM memories_fts " @@ -165,11 +196,37 @@ def search_memories( ) cur = conn.execute(sql, (owner_id, query, SIGNIFICANCE_RANK_BIAS, over_fetch)) rows = cur.fetchall() - if not rows: - return [] - # Recency normalises against the current max id for this owner so the - # boost magnitude is bounded regardless of dataset size. + # FTS-only path: preserve pre-T96 behaviour exactly. + if query_vector is None: + if not rows: + return [] + return _composite_rerank(conn, cols, rows, owner_id, k) + + # Fused path: combine FTS candidates with vector candidates via RRF. + return _rrf_fuse_and_rerank( + conn, + cols=cols, + fts_rows=rows, + owner_id=owner_id, + witness_role=witness_role, + query_vector=query_vector, + k=k, + ) + + +def _composite_rerank( + conn: Connection, + cols: list[str], + rows: list[tuple], + owner_id: str, + k: int, +) -> list[dict]: + """Apply the significance + recency composite re-rank to FTS rows. + + Extracted from ``search_memories`` so the no-vector path stays a single + call and the fused path can re-use the same boost formulae after RRF. + """ max_id_row = conn.execute( "SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,) ).fetchone() @@ -187,3 +244,129 @@ def search_memories( enriched.sort(key=lambda x: x["composite_score"]) return enriched[:k] + + +def _rrf_fuse_and_rerank( + conn: Connection, + *, + cols: list[str], + fts_rows: list[tuple], + owner_id: str, + witness_role: str, + query_vector: list[float], + k: int, +) -> list[dict]: + """Merge FTS + vector candidates via reciprocal-rank fusion, then apply + the existing significance + recency boost as a final tie-breaker. + + RRF formula (Cormack et al. 2009):: + + fusion_score = sum over rankings r of 1 / (RRF_CONST + rank_r) + + where ``rank_r`` is the 0-indexed position of the memory in ranking r. + "Missing from a ranking" is handled by SKIPPING the term for that + ranking — i.e. that channel contributes 0 to the sum, which preserves + the fairness property: a memory that only appears in one ranking is + not penalised relative to itself, just relative to memories that + appeared in both. This matches the canonical RRF presentation. + + The final composite score subtracted from the *negated* fusion score + is:: + + composite = -fusion - sig_boost - recency_boost + + Sorted ascending, smaller-is-better — the same ordering convention as + the FTS-only path so the Python-side significance + recency boosts + apply as a clean tie-breaker without inverting any sign. + """ + # Lazy import to avoid a hard module-level cycle: vector_search reads + # from chat.state.embeddings, which is itself a sibling of this module. + from chat.services.vector_search import vector_search + + fts_rank_by_id: dict[int, int] = {} + fts_row_by_id: dict[int, tuple] = {} + id_idx = cols.index("id") + for rank, row in enumerate(fts_rows): + memory_id = row[id_idx] + fts_rank_by_id[memory_id] = rank + fts_row_by_id[memory_id] = row + + # Over-fetch the vector channel symmetrically so each channel gets a + # fair shot at surfacing its strongest candidates. + vec_over_fetch = max(k * 2, 20) + vec_hits = vector_search( + conn, + owner_id=owner_id, + witness_role=witness_role, + query_vector=query_vector, + k=vec_over_fetch, + ) + vec_rank_by_id: dict[int, int] = { + hit["memory_id"]: rank for rank, hit in enumerate(vec_hits) + } + + # If the vector channel returned nothing (no embeddings indexed), the + # fused path collapses cleanly to the FTS-only path. No error, no + # surprise zero-hit return. + if not vec_rank_by_id and not fts_row_by_id: + return [] + if not vec_rank_by_id: + return _composite_rerank(conn, cols, fts_rows, owner_id, k) + + # For any vector-only hits we don't have a full memory row for yet, + # fetch them in a single round-trip. The FTS row carries an ``fts_rank`` + # column at the end; vector-only rows get ``None`` there. + missing_ids = [mid for mid in vec_rank_by_id if mid not in fts_row_by_id] + select_list = ", ".join(cols) + if missing_ids: + placeholders = ",".join("?" * len(missing_ids)) + cur = conn.execute( + f"SELECT {select_list} FROM memories WHERE id IN ({placeholders})", + missing_ids, + ) + for row in cur.fetchall(): + # Pad with a None for the trailing ``fts_rank`` slot so the row + # shape matches FTS rows downstream. + fts_row_by_id[row[id_idx]] = tuple(row) + (None,) + + # Compute fusion score per candidate. Missing-from-ranking terms are + # simply omitted from the sum. + all_ids = set(fts_rank_by_id) | set(vec_rank_by_id) + fusion_by_id: dict[int, float] = {} + for mid in all_ids: + score = 0.0 + if mid in fts_rank_by_id: + score += 1.0 / (RRF_CONST + fts_rank_by_id[mid]) + if mid in vec_rank_by_id: + score += 1.0 / (RRF_CONST + vec_rank_by_id[mid]) + fusion_by_id[mid] = score + + # Final composite re-rank: significance + recency boosts on top of the + # negated fusion score so the sort direction matches the FTS-only path. + max_id_row = conn.execute( + "SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,) + ).fetchone() + max_id = max_id_row[0] if max_id_row and max_id_row[0] else 1 + + result_cols = cols + ["fts_rank"] + enriched: list[dict] = [] + for mid in all_ids: + row = fts_row_by_id.get(mid) + if row is None: + # Defensive: a vector hit with no memory row would be a logic + # bug (vector_search joins memories), so just skip it rather + # than crash the whole search. + continue + d = dict(zip(result_cols, row)) + sig_boost = _SIGNIFICANCE_WEIGHT * (d.get("significance") or 0) + recency_boost = _RECENCY_WEIGHT * ((d.get("id") or 0) / max_id) + fusion = fusion_by_id[mid] + # Sort ascending, smaller-is-better → negate fusion so a larger + # fusion score yields a smaller composite. Significance and recency + # boosts then act as tie-breakers exactly like the FTS-only path. + d["fusion_score"] = fusion + d["composite_score"] = -fusion - sig_boost - recency_boost + enriched.append(d) + + enriched.sort(key=lambda x: x["composite_score"]) + return enriched[:k] diff --git a/tests/test_memory_search.py b/tests/test_memory_search.py index 76f0ee1..c62c1bf 100644 --- a/tests/test_memory_search.py +++ b/tests/test_memory_search.py @@ -16,6 +16,7 @@ from chat.eventlog.log import append_event from chat.eventlog.projector import project from chat.state.memory import search_memories import chat.state.memory # noqa: F401 (registers memory_written handler) +import chat.state.embeddings # noqa: F401 (registers embedding_indexed handler) def _seed(db, *, memory_specs): @@ -159,3 +160,216 @@ def test_significance_bias_is_constant_module_level(): # Must be non-negative -- a negative bias would invert the desired # "higher significance ranks higher" semantics. assert SIGNIFICANCE_RANK_BIAS >= 0 + + +# --------------------------------------------------------------------------- +# T96 (Phase 4): combined FTS + vector retrieval ranking via reciprocal-rank +# fusion. The fused path activates only when ``query_vector`` is provided — +# the no-vector path (above) is unchanged. +# --------------------------------------------------------------------------- + + +def _one_hot(dim: int, idx: int) -> list[float]: + v = [0.0] * dim + v[idx] = 1.0 + return v + + +def _seed_memories_with_optional_embeddings(db, *, memory_specs): + """Like ``_seed`` but also projects ``embedding_indexed`` events for any + spec carrying a ``vector`` key. + + Memory rows are assigned ids in the order their ``memory_written`` events + were appended (the ``memories.id`` column is an autoincrementing primary + key), so we predict ``memory_id = i + 1`` per spec and append both kinds + of events back-to-back BEFORE projecting. Projecting only once keeps the + INSERT-based ``memory_written`` handler from duplicating rows. + """ + apply_migrations(db) + with open_db(db) as conn: + # First pass: append every memory_written event in order. The DB + # assigns autoincrementing ids 1..N matching the order of these + # events, so we can pair vectors to memories by index. + for spec in memory_specs: + payload = { + "owner_id": spec.get("owner_id", "bot_a"), + "chat_id": spec.get("chat_id", "chat_bot_a"), + "pov_summary": spec["pov_summary"], + "witness_you": spec.get("witness_you", 1), + "witness_host": spec.get("witness_host", 1), + "witness_guest": spec.get("witness_guest", 0), + "source": "direct", + "reliability": 1.0, + "significance": spec.get("significance", 1), + "pinned": 0, + "auto_pinned": 0, + } + append_event(conn, kind="memory_written", payload=payload) + # Second pass: append embedding_indexed events for any spec that + # supplied a vector, using the predicted memory id. + for i, spec in enumerate(memory_specs, start=1): + if "vector" not in spec: + continue + vec = spec["vector"] + append_event( + conn, + kind="embedding_indexed", + payload={ + "memory_id": i, + "vector": list(vec), + "model": "test-model", + "dim": len(vec), + }, + ) + # Single projection — avoids the memory_written handler INSERTing + # the same row twice on a re-projection. + project(conn) + + +def test_search_memories_without_query_vector_uses_fts_only(tmp_path): + """Regression: omitting ``query_vector`` keeps the existing FTS-only path. + + Identical seed to ``test_search_higher_significance_ranks_above_lower`` + but pinned to the no-vector code path explicitly (no kwarg passed). + """ + db = tmp_path / "t.db" + _seed( + db, + memory_specs=[ + {"pov_summary": "small promise"}, + {"pov_summary": "huge promise"}, + {"pov_summary": "tiny promise", "significance": 3}, + ], + ) + with open_db(db) as conn: + out = search_memories(conn, "bot_a", "host", "promise", k=3) + assert len(out) == 3 + # The composite re-rank surfaces the high-significance row first. + assert out[0]["pov_summary"] == "tiny promise" + # Sanity: the row shape still carries ``fts_rank`` + ``composite_score`` + # like the FTS-only path always has. + assert "fts_rank" in out[0] + assert "composite_score" in out[0] + + +def test_search_memories_with_query_vector_includes_vector_hits(tmp_path): + """RRF fuses FTS hits with vector hits — both kinds surface in the result. + + Memory 1 only matches FTS (keyword "rabbit", embedding far from query). + Memory 2 only matches the vector (embedding identical to query, no + keyword overlap). Memories 3-5 are unrelated. The fused top-K must + contain BOTH memory 1 and memory 2. + """ + db = tmp_path / "t.db" + dim = 8 + # Query vector = one-hot at index 0. Memory 2 mirrors it exactly. The + # FTS-only memory (memory 1) has NO embedding so it cannot leak into + # the vector ranking; the filler memories (3-5) likewise have no + # embeddings, so the vector ranking returns memory 2 alone. + query_vec = _one_hot(dim, 0) + _seed_memories_with_optional_embeddings( + db, + memory_specs=[ + # Memory 1: FTS-only match. No embedding indexed. + {"pov_summary": "rabbit hopped over the fence"}, + # Memory 2: vector-only match. No keyword overlap with "rabbit". + { + "pov_summary": "completely unrelated narrative line", + "vector": _one_hot(dim, 0), + }, + # Memories 3-5: filler, irrelevant to both channels. + {"pov_summary": "lighthouse keeper polished the lens"}, + {"pov_summary": "they discussed cartography for hours"}, + {"pov_summary": "she taught him semaphore signals"}, + ], + ) + with open_db(db) as conn: + out = search_memories( + conn, + "bot_a", + "host", + "rabbit", + k=4, + query_vector=query_vec, + ) + summaries = [r["pov_summary"] for r in out] + # FTS-only candidate (memory 1) made it through. + assert "rabbit hopped over the fence" in summaries + # Vector-only candidate (memory 2) also made it through despite + # having no keyword overlap with the query string. + assert "completely unrelated narrative line" in summaries + + +def test_search_memories_fusion_significance_bias_still_applies(tmp_path): + """With two RRF-tied candidates, the higher-significance one ranks first. + + Two memories share the keyword "promise" AND share an identical + embedding to the query — so their FTS rank and vector rank are both + ties. RRF gives them the same fusion score. The Python-side + significance + recency boost must break the tie in favour of the + higher-significance memory. + """ + db = tmp_path / "t.db" + dim = 4 + shared_vec = _one_hot(dim, 0) + _seed_memories_with_optional_embeddings( + db, + memory_specs=[ + { + "pov_summary": "she made a promise", + "significance": 0, + "vector": list(shared_vec), + }, + { + "pov_summary": "she made a promise", + "significance": 3, + "vector": list(shared_vec), + }, + ], + ) + with open_db(db) as conn: + out = search_memories( + conn, + "bot_a", + "host", + "promise", + k=2, + query_vector=list(shared_vec), + ) + assert len(out) == 2 + # Higher significance breaks the RRF tie. + assert out[0]["significance"] == 3 + assert out[1]["significance"] == 0 + + +def test_search_memories_fusion_handles_empty_vector_results(tmp_path): + """Vector path returning [] (no embeddings indexed) must not break FTS. + + No ``embedding_indexed`` events are projected, so ``vector_search`` + returns an empty list. The function should still return the FTS hits + as if ``query_vector`` had not been supplied. + """ + db = tmp_path / "t.db" + _seed( + db, + memory_specs=[ + {"pov_summary": "the vault held an old promise"}, + {"pov_summary": "another promise was kept that night"}, + ], + ) + with open_db(db) as conn: + out = search_memories( + conn, + "bot_a", + "host", + "promise", + k=4, + query_vector=[0.0] * 384, # No embeddings exist for this owner. + ) + # Both FTS hits still come back — no error from the empty vector path. + assert len(out) == 2 + summaries = {r["pov_summary"] for r in out} + assert summaries == { + "the vault held an old promise", + "another promise was kept that night", + } -- 2.52.0 From 6674f9475cfe6832292431ede5b9f2e57cbdea5d Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:51:36 -0400 Subject: [PATCH 13/26] feat: embedding worker drains queue and emits embedding_indexed events (T97.1) --- chat/services/embedding_worker.py | 137 ++++++++++++++++++++++ tests/test_embedding_worker.py | 185 ++++++++++++++++++++++++++++++ 2 files changed, 322 insertions(+) create mode 100644 chat/services/embedding_worker.py create mode 100644 tests/test_embedding_worker.py diff --git a/chat/services/embedding_worker.py b/chat/services/embedding_worker.py new file mode 100644 index 0000000..80f87d8 --- /dev/null +++ b/chat/services/embedding_worker.py @@ -0,0 +1,137 @@ +"""Embedding worker (T97, Phase 4). + +Drains a queue of embedding jobs. Each job carries a memory id and the +narrative text to embed; the worker calls +:func:`chat.services.embeddings.generate_embedding` and emits an +``embedding_indexed`` event so the projector lands the vector in the +``embeddings`` table. + +Mirrors the :class:`chat.services.background.BackgroundWorker` pattern: +single asyncio task, sentinel-based shutdown, exceptions are caught and +logged so a flaky embedding call doesn't take down the worker. Each job +opens its own SQLite connection via ``conn_factory`` — the request path +and the worker do not share connections. + +Featherless concurrency (the 2-conn cap) is respected by virtue of the +single-task design: jobs run strictly serially. Phase 4's pseudo-embedding +path is local and synchronous so this is largely moot, but the pattern +is in place for the Phase 4.5+ real-embedding swap. +""" + +from __future__ import annotations + +import asyncio +import logging +from dataclasses import dataclass +from sqlite3 import Connection +from typing import Callable + +from chat.eventlog.log import append_and_apply +from chat.services.embeddings import ( + DEFAULT_EMBEDDING_DIM, + DEFAULT_EMBEDDING_MODEL, + FALLBACK_EMBEDDING_MODEL, + generate_embedding, +) + + +log = logging.getLogger(__name__) + + +@dataclass +class EmbeddingJob: + """One unit of work for the embedding worker. + + ``memory_id`` is the row to attach the vector to; ``text`` is the + narrative text to embed (typically ``memories.pov_summary``). + """ + + memory_id: int + text: str + + +class EmbeddingWorker: + """asyncio.Queue-backed single-worker task for embedding generation. + + Started on app startup; ``stop()`` enqueues a sentinel and awaits + the task so any in-flight job has a chance to finish. Pending jobs + after the sentinel are dropped on shutdown. + """ + + def __init__( + self, + *, + conn_factory: Callable[[], Connection], + client, # LLMClient | None — unused on the pseudo path. + model: str = DEFAULT_EMBEDDING_MODEL, + dim: int = DEFAULT_EMBEDDING_DIM, + enabled: bool = True, + ) -> None: + self._queue: asyncio.Queue[EmbeddingJob | None] = asyncio.Queue() + self._conn_factory = conn_factory + self._client = client + self._model = model + self._dim = dim + self._task: asyncio.Task | None = None + self.enabled = enabled + + def enqueue(self, job: EmbeddingJob) -> None: + if not self.enabled: + return + self._queue.put_nowait(job) + + async def start(self) -> None: + if self._task is None: + self._task = asyncio.create_task(self._run()) + + async def stop(self) -> None: + if self._task is None: + return + self._queue.put_nowait(None) # sentinel + await self._task + self._task = None + + async def _run(self) -> None: + while True: + job = await self._queue.get() + if job is None: + return + try: + await self._process(job) + except Exception as exc: # noqa: BLE001 — worker must not die + log.warning( + "embedding worker failed for memory_id=%s: %s", + job.memory_id, + exc, + exc_info=True, + ) + + async def _process(self, job: EmbeddingJob) -> None: + result = await generate_embedding( + self._client, + text=job.text, + model=self._model, + dim=self._dim, + ) + if result.model == FALLBACK_EMBEDDING_MODEL: + # Don't index a fallback (zero) vector — the backfill script + # can retry later once a real embedding is available. + log.debug( + "embedding worker skipping fallback result for memory_id=%s", + job.memory_id, + ) + return + with self._conn_factory() as conn: + append_and_apply( + conn, + kind="embedding_indexed", + payload={ + "memory_id": job.memory_id, + "model": result.model, + "dim": result.dim, + "vector": result.vector, + }, + ) + + +__all__ = ["EmbeddingJob", "EmbeddingWorker"] diff --git a/tests/test_embedding_worker.py b/tests/test_embedding_worker.py new file mode 100644 index 0000000..f7d9416 --- /dev/null +++ b/tests/test_embedding_worker.py @@ -0,0 +1,185 @@ +"""Embedding worker (T97, Phase 4). + +The worker drains a queue of EmbeddingJobs and emits ``embedding_indexed`` +events. Mirrors test_significance.py's BackgroundWorker tests in shape: +seed a memory, enqueue jobs, call ``stop()`` to drain via sentinel, then +assert on the projected ``embeddings`` table and the underlying event_log. +""" + +from __future__ import annotations + +from pathlib import Path + +from chat.db.connection import open_db +from chat.db.migrate import apply_migrations +from chat.eventlog.log import append_event +from chat.eventlog.projector import project +from chat.services.embedding_worker import EmbeddingJob, EmbeddingWorker +from chat.services.embeddings import ( + DEFAULT_EMBEDDING_MODEL, + EmbeddingResult, + FALLBACK_EMBEDDING_MODEL, +) + +# Trigger handler registration for projection. +import chat.state.embeddings # noqa: F401 +import chat.state.entities # noqa: F401 +import chat.state.memory # noqa: F401 +import chat.state.world # noqa: F401 + + +def _seed_memories(db_path: Path, count: int) -> list[int]: + """Seed ``count`` memory rows for ``bot_a`` and return their ids.""" + with open_db(db_path) as conn: + append_event( + conn, + kind="bot_authored", + payload={ + "id": "bot_a", + "name": "BotA", + "persona": "...", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "", + "kickoff_prose": "", + }, + ) + append_event( + conn, + kind="chat_created", + payload={ + "id": "chat_bot_a", + "host_bot_id": "bot_a", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + for i in range(count): + append_event( + conn, + kind="memory_written", + payload={ + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "pov_summary": f"memory text {i}", + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "source": "direct", + "reliability": 1.0, + "significance": 1, + "pinned": 0, + "auto_pinned": 0, + }, + ) + project(conn) + return [ + r[0] + for r in conn.execute( + "SELECT id FROM memories WHERE owner_id = 'bot_a' ORDER BY id" + ).fetchall() + ] + + +async def test_worker_drains_jobs_and_emits_indexed_events(tmp_path): + """Three jobs in -> three ``embedding_indexed`` events out, all + projected into the ``embeddings`` table.""" + db = tmp_path / "t.db" + apply_migrations(db) + memory_ids = _seed_memories(db, count=3) + + worker = EmbeddingWorker( + conn_factory=lambda: open_db(db), + client=None, # pseudo path — no client needed + ) + await worker.start() + for mid in memory_ids: + worker.enqueue(EmbeddingJob(memory_id=mid, text=f"text-{mid}")) + await worker.stop() + + with open_db(db) as conn: + # Three embedding_indexed events landed. + cur = conn.execute( + "SELECT COUNT(*) FROM event_log WHERE kind = 'embedding_indexed'" + ) + assert cur.fetchone()[0] == 3 + # Three rows in the embeddings table — one per memory. + cur = conn.execute( + "SELECT memory_id, model, dim FROM embeddings ORDER BY memory_id" + ) + rows = cur.fetchall() + assert len(rows) == 3 + for (mid, model, dim), expected_mid in zip(rows, memory_ids): + assert mid == expected_mid + assert model == DEFAULT_EMBEDDING_MODEL + assert dim > 0 + + +async def test_worker_skips_fallback_results(tmp_path, monkeypatch): + """A fallback EmbeddingResult must NOT produce an embedding_indexed + event — backfill can retry later when a real embedding is available.""" + db = tmp_path / "t.db" + apply_migrations(db) + memory_ids = _seed_memories(db, count=1) + + async def _fake_generate(client, *, text, model, dim, timeout_s=30.0): + return EmbeddingResult( + vector=[0.0] * dim, model=FALLBACK_EMBEDDING_MODEL, dim=dim + ) + + # Patch the symbol the worker resolved at import time. + import chat.services.embedding_worker as worker_mod + + monkeypatch.setattr(worker_mod, "generate_embedding", _fake_generate) + + worker = EmbeddingWorker( + conn_factory=lambda: open_db(db), + client=None, + ) + await worker.start() + worker.enqueue(EmbeddingJob(memory_id=memory_ids[0], text="anything")) + await worker.stop() + + with open_db(db) as conn: + cur = conn.execute( + "SELECT COUNT(*) FROM event_log WHERE kind = 'embedding_indexed'" + ) + assert cur.fetchone()[0] == 0 + cur = conn.execute("SELECT COUNT(*) FROM embeddings") + assert cur.fetchone()[0] == 0 + + +async def test_worker_handles_concurrent_jobs_serially(tmp_path): + """Five jobs queued back-to-back must process in FIFO order — the + single-task design respects the Featherless 2-conn cap (and keeps + event_log ordering deterministic).""" + db = tmp_path / "t.db" + apply_migrations(db) + memory_ids = _seed_memories(db, count=5) + + worker = EmbeddingWorker( + conn_factory=lambda: open_db(db), + client=None, + ) + await worker.start() + # Enqueue all five before yielding to the loop — exercises the queue + # rather than a one-at-a-time drain. + for mid in memory_ids: + worker.enqueue(EmbeddingJob(memory_id=mid, text=f"text-{mid}")) + await worker.stop() + + with open_db(db) as conn: + # Events landed in enqueue order (FIFO). + cur = conn.execute( + "SELECT json_extract(payload_json, '$.memory_id') " + "FROM event_log WHERE kind = 'embedding_indexed' " + "ORDER BY id" + ) + seen = [r[0] for r in cur.fetchall()] + assert seen == memory_ids + + # All five embeddings projected. + cur = conn.execute("SELECT COUNT(*) FROM embeddings") + assert cur.fetchone()[0] == 5 -- 2.52.0 From 64a07aa87fa19cc388bf65824995bb746cac1a4d Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:51:40 -0400 Subject: [PATCH 14/26] feat: memory_write enqueues embedding job after each memory_written (T97.2) --- chat/services/memory_write.py | 43 +++++++++++++++++++++++++++++++- tests/test_memory_write.py | 46 +++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/chat/services/memory_write.py b/chat/services/memory_write.py index d60c3d9..5e89eb3 100644 --- a/chat/services/memory_write.py +++ b/chat/services/memory_write.py @@ -13,6 +13,14 @@ Phase 1 simplifications (per plan §11.1, T27 will refine): pass overwrites via a follow-up event. - Witness flags are hard-coded ``[you=1, host=1, guest=0]``. Phase 2 will derive them from ``chat.guest_bot_id`` once a guest can be present. + +T97 (Phase 4): each successful memory write also enqueues an +:class:`~chat.services.embedding_worker.EmbeddingJob` on the +lifespan-managed embedding worker, so the just-written memory gets a +vector indexed out-of-band. The hook is opt-in via the ``app`` kwarg — +callers without a FastAPI app handle (e.g. one-off scripts, isolated +unit tests) simply don't enqueue, and the backfill script can pick up +those rows later. """ from __future__ import annotations @@ -20,6 +28,7 @@ from __future__ import annotations from sqlite3 import Connection from chat.eventlog.log import append_and_apply +from chat.services.embedding_worker import EmbeddingJob def _write_one_memory( @@ -35,9 +44,16 @@ def _write_one_memory( chat_clock_at: str | None, source: str, significance: int, + app=None, ) -> tuple[int, int | None]: """Append a single ``memory_written`` event for ``owner_id`` and return - ``(event_id, memory_id)`` for the projected row.""" + ``(event_id, memory_id)`` for the projected row. + + When ``app`` is provided and ``app.state.embedding_worker`` exists, + enqueue an :class:`EmbeddingJob` for the freshly-projected memory id + (T97). Skipped silently if the worker is absent or the projected row + can't be located — the backfill script handles missing-vector rows. + """ payload: dict = { "owner_id": owner_id, "chat_id": chat_id, @@ -64,6 +80,23 @@ def _write_one_memory( (owner_id, chat_id), ).fetchone() memory_id = row[0] if row else None + + # T97: enqueue an embedding job for the just-written memory. The + # worker drains the queue out-of-band and emits an + # ``embedding_indexed`` event when the vector is ready. ``getattr`` + # keeps this a no-op for callers without a wired-up app (scripts, + # tests) — the backfill script handles those rows. + if memory_id is not None and narrative_text and narrative_text.strip(): + worker = ( + getattr(app.state, "embedding_worker", None) + if app is not None + else None + ) + if worker is not None: + worker.enqueue( + EmbeddingJob(memory_id=memory_id, text=narrative_text) + ) + return event_id, memory_id @@ -79,6 +112,7 @@ def record_turn_memory_for_present( source: str = "direct", significance: int = 1, you_present: bool = True, + app=None, ) -> dict[str, tuple[int, int | None]]: """Single entry-point for per-turn memory writes (T84). @@ -97,6 +131,9 @@ def record_turn_memory_for_present( with ``you_present=False`` is a programming error and raises :class:`ValueError`. + When ``app`` is provided, each per-witness write also enqueues an + :class:`EmbeddingJob` on ``app.state.embedding_worker`` (T97). + Returns a mapping ``{bot_id: (event_id, memory_id)}`` so callers can look up the freshly-projected memory id per owner without re-querying the database. @@ -121,6 +158,7 @@ def record_turn_memory_for_present( chat_clock_at=chat_clock_at, source=source, significance=significance, + app=app, ) if guest_bot_id is not None: result[guest_bot_id] = _write_one_memory( @@ -135,6 +173,7 @@ def record_turn_memory_for_present( chat_clock_at=chat_clock_at, source=source, significance=significance, + app=app, ) return result @@ -150,6 +189,7 @@ def record_meanwhile_memory( chat_clock_at: str | None = None, source: str = "direct", significance: int = 1, + app=None, ) -> dict[str, tuple[int, int | None]]: """Backward-compat thin wrapper for meanwhile memory writes (T64, T84). @@ -169,4 +209,5 @@ def record_meanwhile_memory( source=source, significance=significance, you_present=False, + app=app, ) diff --git a/tests/test_memory_write.py b/tests/test_memory_write.py index 8c5253a..3c135a5 100644 --- a/tests/test_memory_write.py +++ b/tests/test_memory_write.py @@ -540,3 +540,49 @@ def test_record_turn_memory_you_present_false_requires_guest(tmp_path): narrative_text="invalid", you_present=False, ) + + +# --------------------------------------------------------------------------- +# T97: embedding-worker enqueue hook. +# --------------------------------------------------------------------------- + + +def test_record_turn_memory_enqueues_embedding_job(tmp_path): + """When ``app.state.embedding_worker`` is wired, every per-witness + write enqueues an :class:`EmbeddingJob` carrying the freshly-projected + memory id and the narrative text. Two-bot turn -> two jobs.""" + from types import SimpleNamespace + + from chat.services.embedding_worker import EmbeddingJob + + db = tmp_path / "t.db" + apply_migrations(db) + _seed_two_bots(db) + + captured: list[EmbeddingJob] = [] + + class _StubWorker: + def enqueue(self, job: EmbeddingJob) -> None: + captured.append(job) + + fake_app = SimpleNamespace( + state=SimpleNamespace(embedding_worker=_StubWorker()) + ) + + with open_db(db) as conn: + result = record_turn_memory_for_present( + conn, + chat_id="chat_ab", + host_bot_id="bot_a", + guest_bot_id="bot_b", + narrative_text="Both bots witness this beat.", + app=fake_app, + ) + + # One job per witness — host first, then guest (matches result dict + # insertion order in record_turn_memory_for_present). + assert len(captured) == 2 + expected_ids = {result["bot_a"][1], result["bot_b"][1]} + assert {job.memory_id for job in captured} == expected_ids + for job in captured: + assert job.text == "Both bots witness this beat." -- 2.52.0 From 9c63d6b24c315c5b0a7126e6668c3b2a1c9567cf Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:51:44 -0400 Subject: [PATCH 15/26] feat: app lifespan starts/stops EmbeddingWorker (T97.3) --- chat/app.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/chat/app.py b/chat/app.py index c9daf90..9e2c74b 100644 --- a/chat/app.py +++ b/chat/app.py @@ -16,6 +16,7 @@ from chat.db.migrate import apply_migrations from chat.eventlog.log import read_events from chat.eventlog.projector import apply_event from chat.services.background import BackgroundWorker +from chat.services.embedding_worker import EmbeddingWorker from chat.services.snapshot import latest_snapshot_path, restore_from_snapshot # Trigger handler registration: @@ -85,9 +86,23 @@ async def lifespan(app: FastAPI): await worker.start() app.state.background_worker = worker + # T97: separate worker for the async embedding pass. Each + # ``memory_written`` enqueues an EmbeddingJob; the worker drains the + # queue, calls ``generate_embedding``, and emits ``embedding_indexed``. + # Phase 4's pseudo-embedding path is local so the worker doesn't need + # an LLM client; we still pass one so the Phase 4.5 swap to a real + # model is a one-line change. + embedding_worker = EmbeddingWorker( + conn_factory=lambda: open_db(settings.db_path), + client=_factory(), + ) + await embedding_worker.start() + app.state.embedding_worker = embedding_worker + try: yield finally: + await embedding_worker.stop() await worker.stop() -- 2.52.0 From d85ed8aaa6e90caa467d4edce942264fe4d135d7 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 02:51:48 -0400 Subject: [PATCH 16/26] feat: backfill_embeddings script for existing memories (T97.4) --- scripts/backfill_embeddings.py | 97 ++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 scripts/backfill_embeddings.py diff --git a/scripts/backfill_embeddings.py b/scripts/backfill_embeddings.py new file mode 100644 index 0000000..f5c15bb --- /dev/null +++ b/scripts/backfill_embeddings.py @@ -0,0 +1,97 @@ +"""Backfill embeddings for memories that lack them (T97, Phase 4). + +Walks all memories where no row exists in the ``embeddings`` table. For +each, calls :func:`chat.services.embeddings.generate_embedding` and emits +an ``embedding_indexed`` event so the projector lands the vector. + +Phase 4 ships the deterministic local pseudo-embedding so this script +runs synchronously without a network round-trip — the LLMClient argument +is not needed on the pseudo path. Phase 4.5+ will need a real client. + +Run from the repo root: + .venv/bin/python scripts/backfill_embeddings.py [--limit N] [--dry-run] +""" + +from __future__ import annotations + +import argparse +import asyncio + +from chat.config import load_settings +from chat.db.connection import open_db +from chat.db.migrate import apply_migrations +from chat.eventlog.log import append_and_apply +from chat.services.embeddings import ( + FALLBACK_EMBEDDING_MODEL, + generate_embedding, +) + +# Trigger projector handler registration so ``append_and_apply`` lands +# the embedding rows correctly. +import chat.state.embeddings # noqa: F401 +import chat.state.entities # noqa: F401 +import chat.state.memory # noqa: F401 +import chat.state.world # noqa: F401 + + +async def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--limit", + type=int, + default=None, + help="Cap the number of memories backfilled in this run.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Print the count of memories needing embeddings, then exit.", + ) + args = parser.parse_args() + + settings = load_settings() + settings.db_path.parent.mkdir(parents=True, exist_ok=True) + apply_migrations(settings.db_path) + + with open_db(settings.db_path) as conn: + sql = ( + "SELECT m.id, m.pov_summary FROM memories m " + "LEFT JOIN embeddings e ON e.memory_id = m.id " + "WHERE e.memory_id IS NULL " + "ORDER BY m.id" + ) + if args.limit is not None: + sql += f" LIMIT {int(args.limit)}" + rows = conn.execute(sql).fetchall() + print(f"Found {len(rows)} memories needing embeddings.") + if args.dry_run: + return + + indexed = 0 + skipped = 0 + for memory_id, text in rows: + result = await generate_embedding( + client=None, # pseudo path: no client needed + text=text or "", + ) + if result.model == FALLBACK_EMBEDDING_MODEL: + print(f" Skipping memory_id={memory_id} (empty text)") + skipped += 1 + continue + append_and_apply( + conn, + kind="embedding_indexed", + payload={ + "memory_id": memory_id, + "model": result.model, + "dim": result.dim, + "vector": result.vector, + }, + ) + indexed += 1 + print(f" Indexed memory_id={memory_id}") + print(f"Done. Indexed {indexed}, skipped {skipped}.") + + +if __name__ == "__main__": + asyncio.run(main()) -- 2.52.0 From 177e39d59cfcd5dc0b7787b9da39e6973b047ef0 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 03:08:36 -0400 Subject: [PATCH 17/26] feat: wire embedding worker call sites in turns/meanwhile/skip/regenerate (T97.5) --- chat/services/regenerate.py | 3 + chat/web/drawer.py | 2 + chat/web/meanwhile.py | 2 + chat/web/skip.py | 3 + chat/web/turns.py | 5 + tests/test_phase4_integration.py | 180 +++++++++++++++++++++++++++++++ 6 files changed, 195 insertions(+) create mode 100644 tests/test_phase4_integration.py diff --git a/chat/services/regenerate.py b/chat/services/regenerate.py index 0678a76..6442bb2 100644 --- a/chat/services/regenerate.py +++ b/chat/services/regenerate.py @@ -103,6 +103,7 @@ async def regenerate_assistant_turn( chat_id: str, original_assistant_event_id: int, edited_user_prose: str | None = None, + app=None, ) -> str: """Regenerate the assistant turn linked to ``original_assistant_event_id``. @@ -414,6 +415,7 @@ async def regenerate_assistant_turn( narrative_text=new_text, scene_id=scene["id"] if scene else None, chat_clock_at=chat.get("time"), + app=app, ) last_at = chat.get("time") @@ -648,6 +650,7 @@ async def regenerate_assistant_turn( narrative_text=interject_text, scene_id=scene["id"] if scene else None, chat_clock_at=chat.get("time"), + app=app, ) # Re-run the multi-pair state-update with the post-interjection diff --git a/chat/web/drawer.py b/chat/web/drawer.py index bcfdc0d..97f03cf 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -993,6 +993,7 @@ async def skip_elision( chat_id=chat_id, new_time=new_time, landing_state_hint=landing_state_hint, + app=request.app, ) except ChatNotFoundError as exc: # Missing chat row: typed exception (T81) replaces the prior @@ -1036,6 +1037,7 @@ async def skip_jump( new_time=new_time, notable_prose=notable_prose, reset_activity=reset_flag, + app=request.app, ) except ChatNotFoundError as exc: # Missing chat row: typed exception (T81) replaces the prior diff --git a/chat/web/meanwhile.py b/chat/web/meanwhile.py index 5c46b3e..52a91bc 100644 --- a/chat/web/meanwhile.py +++ b/chat/web/meanwhile.py @@ -131,6 +131,7 @@ async def process_meanwhile_turn( *, chat_id: str, prose: str, + app=None, ) -> dict: """Run one meanwhile turn end-to-end. @@ -314,6 +315,7 @@ async def process_meanwhile_turn( narrative_text=text, scene_id=scene_id, chat_clock_at=chat.get("time"), + app=app, ) # 9. Post-turn state-update — exactly 2 directed pairs over the diff --git a/chat/web/skip.py b/chat/web/skip.py index b6aa179..fd241df 100644 --- a/chat/web/skip.py +++ b/chat/web/skip.py @@ -91,6 +91,7 @@ async def process_elision_skip( chat_id: str, new_time: str, landing_state_hint: str = "", + app=None, ) -> dict: """Run an elision skip end-to-end. @@ -175,6 +176,7 @@ async def process_jump_skip( new_time: str, notable_prose: str = "", reset_activity: bool = False, + app=None, ) -> dict: """Run a jump skip end-to-end. @@ -254,6 +256,7 @@ async def process_jump_skip( chat_clock_at=new_time, source="synthesized", significance=mem.significance, + app=app, ) narration = await narrate_skip( diff --git a/chat/web/turns.py b/chat/web/turns.py index 94f46d4..97ef4a6 100644 --- a/chat/web/turns.py +++ b/chat/web/turns.py @@ -248,6 +248,7 @@ async def post_turn( settings, chat_id=chat_id, prose=prose, + app=request.app, ) except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) @@ -352,6 +353,7 @@ async def post_turn( new_time=new_time, landing_state_hint=getattr(parsed, "landing_state_hint", "") or "", + app=request.app, ) except ChatNotFoundError as exc: # Defensive: chat existence is checked above, so this only @@ -512,6 +514,7 @@ async def post_turn( narrative_text=primary_text, scene_id=scene["id"] if scene else None, chat_clock_at=chat.get("time"), + app=request.app, ) # 7b. Post-turn state-update pass (Requirements §3.4 / T40). All @@ -746,6 +749,7 @@ async def post_turn( narrative_text=interjection_text, scene_id=scene["id"] if scene else None, chat_clock_at=chat.get("time"), + app=request.app, ) # T74.2: enqueue a significance pass for the interjection @@ -1092,6 +1096,7 @@ async def regenerate_turn( chat_id=chat_id, original_assistant_event_id=event_id, edited_user_prose=edited_prose, + app=request.app, ) except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) diff --git a/tests/test_phase4_integration.py b/tests/test_phase4_integration.py new file mode 100644 index 0000000..ee30f07 --- /dev/null +++ b/tests/test_phase4_integration.py @@ -0,0 +1,180 @@ +"""Phase 4 cross-feature integration tests (T97 follow-up). + +Wave 8 / T101 will populate this file with the full Phase 4 retrieval + +embedding integration suite. For now this houses a single test pinning +the T97.5 wiring: the production turn route plumbs ``app=request.app`` +all the way through ``record_turn_memory_for_present`` so the embedding +worker actually receives jobs in production. Without this fix-up the +plumbing added in T97 was dormant — every per-witness write took the +no-app branch and silently dropped the embed enqueue. + +The test monkeypatches ``app.state.embedding_worker.enqueue`` to record +jobs (rather than draining the worker mid-test) so the assertion is +deterministic and free of asyncio-timing flakiness inside FastAPI's +TestClient. The bug we're guarding against is "did the call site pass +``app`` at all" — the worker's drain path is exercised in +:mod:`tests.test_embedding_worker`, so duplicating that here would add +no coverage. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from chat.app import app +from chat.db.connection import open_db +from chat.eventlog.log import append_event +from chat.eventlog.projector import project +from chat.llm.mock import MockLLMClient + + +def _zero_state() -> str: + return json.dumps( + {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []} + ) + + +def _override_llm(canned: list[str]) -> MockLLMClient: + from chat.web.kickoff import get_llm_client + + mock = MockLLMClient(canned=list(canned)) + app.dependency_overrides[get_llm_client] = lambda: mock + return mock + + +@pytest.fixture +def app_state_setup(tmp_path, monkeypatch): + cfg = tmp_path / "config.toml" + cfg.write_text('featherless_api_key = "test"\n') + monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg)) + db = tmp_path / "test.db" + monkeypatch.setenv("CHAT_DB_PATH", str(db)) + with TestClient(app) as c: + # The background worker is disabled so the canned-response queue + # is consumed only by the request path. The embedding worker + # stays "started" but its loop won't observe the captured + # enqueues — we replace ``enqueue`` on the worker instance below. + app.state.background_worker.enabled = False + yield c + app.dependency_overrides.clear() + + +def _seed(db_path: Path) -> None: + """Mirror of ``tests/test_turn_flow.py::_seed`` — single bot + chat + + edge + activities so the prompt assembler has something to render. + """ + with open_db(db_path) as conn: + append_event( + conn, + kind="bot_authored", + payload={ + "id": "bot_a", + "name": "BotA", + "persona": "thoughtful, observant", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "", + "kickoff_prose": "...", + }, + ) + append_event( + conn, + kind="chat_created", + payload={ + "id": "chat_bot_a", + "host_bot_id": "bot_a", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + append_event( + conn, + kind="edge_update", + payload={ + "source_id": "bot_a", + "target_id": "you", + "chat_id": "chat_bot_a", + "knowledge_facts": ["coworker"], + }, + ) + for entity_id, verb in [("you", "talking"), ("bot_a", "listening")]: + append_event( + conn, + kind="activity_change", + payload={ + "entity_id": entity_id, + "posture": "sitting", + "action": { + "verb": verb, + "interruptible": True, + "required_attention": "low", + "expected_duration": "ongoing", + }, + "attention": "", + "holding": [], + "status": {}, + }, + ) + project(conn) + + +def test_post_turn_embeddings_indexed_via_worker_hook( + app_state_setup, tmp_path +): + """POST a turn; the route must pass ``app=request.app`` into + ``record_turn_memory_for_present`` so the per-witness write enqueues + an :class:`EmbeddingJob` on ``app.state.embedding_worker``. + + Without the T97.5 wiring this test fails: the call site previously + omitted ``app=`` and the helper's ``app is None`` branch silently + skipped every enqueue. We monkeypatch ``enqueue`` on the live + embedding worker (rather than draining the queue mid-request) so the + assertion does not depend on asyncio scheduling inside the + TestClient — the bug is in the wiring, and the wiring is what we + pin. The drain path is covered separately in + :mod:`tests.test_embedding_worker`. + """ + _seed(tmp_path / "test.db") + + canned_parse = json.dumps( + {"segments": [{"kind": "dialogue", "text": "hello"}]} + ) + _override_llm( + [canned_parse, "Hi there.", _zero_state(), _zero_state()] + ) + + captured: list = [] + worker = app.state.embedding_worker + original_enqueue = worker.enqueue + worker.enqueue = captured.append # type: ignore[assignment] + try: + response = app_state_setup.post( + "/chats/chat_bot_a/turns", data={"prose": "hello"} + ) + assert response.status_code == 204 + finally: + worker.enqueue = original_enqueue # type: ignore[assignment] + app.dependency_overrides.clear() + + # Single-bot turn -> one ``memory_written`` -> one EmbeddingJob. + # The job's ``memory_id`` should match the freshly-projected memory + # row, and its ``text`` should carry the assistant's narrative text. + assert len(captured) == 1 + job = captured[0] + assert job.text == "Hi there." + + with open_db(tmp_path / "test.db") as conn: + memory_ids = [ + r[0] + for r in conn.execute( + "SELECT id FROM memories WHERE owner_id = ?", + ("bot_a",), + ).fetchall() + ] + assert job.memory_id in memory_ids -- 2.52.0 From d39d31479dbfa9899515e6930fbc5447cf029d26 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 03:24:02 -0400 Subject: [PATCH 18/26] feat: drawer branching UI (T98.1) --- chat/templates/_drawer.html | 42 ++++++++ chat/web/drawer.py | 108 +++++++++++++++++++++ tests/test_drawer_phase4.py | 189 ++++++++++++++++++++++++++++++++++++ 3 files changed, 339 insertions(+) create mode 100644 tests/test_drawer_phase4.py diff --git a/chat/templates/_drawer.html b/chat/templates/_drawer.html index 43a659a..621d4af 100644 --- a/chat/templates/_drawer.html +++ b/chat/templates/_drawer.html @@ -414,6 +414,48 @@ {% endif %} +
+

Branches

+ {% if branches %} +
    + {% for b in branches %} +
  • + {{ b.name }} + {% if b.is_active %} (active){% endif %} + · {{ b.event_count }} events + {% if not b.is_active %} +
    + + +
    + {% endif %} +
  • + {% endfor %} +
+ {% else %} +

No branches yet.

+ {% endif %} +
+ Create branch +
+ + + +
+
+
+

Pinned memories ({{ pinned|length }} / {{ pin_cap }})

{% if pinned %} diff --git a/chat/web/drawer.py b/chat/web/drawer.py index 97f03cf..93c017d 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -36,7 +36,14 @@ from fastapi.responses import HTMLResponse from fastapi.templating import Jinja2Templates from chat.eventlog.log import append_and_apply +from chat.services.branching import ( + branch_from_event, + list_branches_with_metadata, + switch_active_branch, +) +from chat.services.delete_impact import compute_delete_impact from chat.services.relationship_seed import seed_inter_bot_edges +from chat.services.rewind import execute_rewind from chat.services.scene_summarize import apply_scene_close_summary from chat.state.edges import get_edge from chat.state.entities import get_bot, get_you, list_bots @@ -169,6 +176,11 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)): active_events = list_active_events(conn, chat_id) open_threads = list_open_threads(conn, chat_id) + # T98.1: branch metadata (every chat sees the global branch list — branches + # may be chat-scoped or global, so :func:`list_branches_with_metadata` + # returns both flavours and the template highlights the active one). + branches = list_branches_with_metadata(conn, chat_id) + return TEMPLATES.TemplateResponse( request, "_drawer.html", @@ -196,6 +208,7 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)): "pin_cap": PIN_CAP, "active_events": active_events, "open_threads": open_threads, + "branches": branches, }, ) @@ -1080,3 +1093,98 @@ async def close_thread( }, ) return await drawer(chat_id, request, conn) + + +# --- T98.1 branching UI -------------------------------------------------- +# +# Three POST endpoints wired to the Phase 4 :mod:`chat.services.branching` +# helpers. The drawer's "Branches" panel exposes: +# +# * Create from a free-form ``origin_event_id``. +# * Switch the active branch by name. +# * Convenience "branch from this turn" against a per-turn event_id (the +# chat surface stamps ``id="turn-"`` on every turn so users can +# pick the right one without copying ids by hand). +# +# All three return the refreshed drawer partial; failures from the service +# layer (duplicate name, unknown branch, invalid origin) surface as 400 so +# HTMX displays the inline error. + + +@router.post( + "/chats/{chat_id}/drawer/branch/create", + response_class=HTMLResponse, +) +async def create_branch( + chat_id: str, + request: Request, + name: str = Form(...), + origin_event_id: int = Form(...), + conn=Depends(get_conn), +): + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + try: + branch_from_event( + conn, + name=name, + origin_event_id=int(origin_event_id), + chat_id=chat_id, + ) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + return await drawer(chat_id, request, conn) + + +@router.post( + "/chats/{chat_id}/drawer/branch/switch", + response_class=HTMLResponse, +) +async def switch_branch( + chat_id: str, + request: Request, + name: str = Form(...), + conn=Depends(get_conn), +): + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + try: + switch_active_branch(conn, name=name) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + return await drawer(chat_id, request, conn) + + +@router.post( + "/chats/{chat_id}/drawer/branch/from-turn/{event_id}", + response_class=HTMLResponse, +) +async def branch_from_turn( + chat_id: str, + event_id: int, + request: Request, + name: str = Form(...), + conn=Depends(get_conn), +): + """Convenience: branch from a specific turn event. + + Identical to :func:`create_branch` except ``origin_event_id`` is + encoded in the URL — the chat surface renders one such form per turn + so users can fork mid-conversation without authoring an event id by + hand. + """ + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + try: + branch_from_event( + conn, + name=name, + origin_event_id=int(event_id), + chat_id=chat_id, + ) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + return await drawer(chat_id, request, conn) diff --git a/tests/test_drawer_phase4.py b/tests/test_drawer_phase4.py new file mode 100644 index 0000000..3e0f875 --- /dev/null +++ b/tests/test_drawer_phase4.py @@ -0,0 +1,189 @@ +"""T98 (Phase 4): drawer phase-4 bundle. + +Five sub-features extending the chat drawer: + +* T98.1 — branching UI (create / switch / from-turn). +* T98.2 — significance-review panel (distribution + significance edits). +* T98.3 — hide-from-view toggle (per-turn, via ``manual_edit`` projector + branch ``turn_hidden``). +* T98.4 — surgical delete with cascade preview (preview modal + + rewind execution against a target turn). +* T98.5 — remaining v1 edits (chat narrative_anchor + weather). + +Tests follow the T59 pattern in ``tests/test_drawer_events_threads_skip.py`` +— a TestClient against the real FastAPI app with a per-test temp DB. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from chat.app import app +from chat.db.connection import open_db +from chat.eventlog.log import append_and_apply, append_event +from chat.eventlog.projector import project + + +@pytest.fixture +def client(tmp_path, monkeypatch): + cfg = tmp_path / "config.toml" + cfg.write_text('featherless_api_key = "test"\n') + monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg)) + db = tmp_path / "test.db" + monkeypatch.setenv("CHAT_DB_PATH", str(db)) + with TestClient(app) as c: + if hasattr(app.state, "background_worker"): + app.state.background_worker.enabled = False + yield c + + +def _bot_payload(bot_id: str, name: str) -> dict: + return { + "id": bot_id, + "name": name, + "persona": "...", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "", + "kickoff_prose": "", + } + + +def _seed_chat(db: Path, *, with_scene: bool = True) -> int: + """Seed a chat hosted by ``bot_a``; return the latest event id (chat_created).""" + with open_db(db) as conn: + append_event(conn, kind="bot_authored", payload=_bot_payload("bot_a", "BotA")) + append_event( + conn, + kind="you_authored", + payload={"name": "Me", "pronouns": "they/them", "persona": ""}, + ) + chat_event_id = append_event( + conn, + kind="chat_created", + payload={ + "id": "chat_bot_a", + "host_bot_id": "bot_a", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + if with_scene: + append_event( + conn, + kind="scene_opened", + payload={ + "chat_id": "chat_bot_a", + "container_id": None, + "started_at": "2026-04-26T20:00:00+00:00", + "participants": ["you", "bot_a"], + }, + ) + project(conn) + return chat_event_id + + +# --------------------------------------------------------------------------- +# T98.1 — branching UI. +# --------------------------------------------------------------------------- + + +def test_t98_1_create_branch_emits_branch_created_and_renders(client, tmp_path): + db = tmp_path / "test.db" + seed_id = _seed_chat(db) + + response = client.post( + "/chats/chat_bot_a/drawer/branch/create", + data={"name": "experiment_a", "origin_event_id": str(seed_id)}, + ) + assert response.status_code == 200 + + with open_db(db) as conn: + rows = conn.execute( + "SELECT COUNT(*) FROM event_log WHERE kind = 'branch_created'" + ).fetchone() + assert rows[0] == 1 + from chat.state.branches import get_branch + + b = get_branch(conn, "experiment_a") + assert b is not None + assert b["origin_event_id"] == seed_id + assert b["chat_id"] == "chat_bot_a" + + # Drawer partial lists the new branch. + body = response.text + assert "

Branches

" in body + assert "experiment_a" in body + + +def test_t98_1_switch_branch_marks_active_and_unknown_400s(client, tmp_path): + db = tmp_path / "test.db" + seed_id = _seed_chat(db) + + # Create branch directly via the service so this test focuses on switch. + with open_db(db) as conn: + from chat.services.branching import branch_from_event + + branch_from_event( + conn, name="experiment_b", origin_event_id=seed_id, chat_id="chat_bot_a" + ) + + response = client.post( + "/chats/chat_bot_a/drawer/branch/switch", + data={"name": "experiment_b"}, + ) + assert response.status_code == 200 + + with open_db(db) as conn: + from chat.state.branches import active_branch + + active = active_branch(conn) + assert active is not None + assert active["name"] == "experiment_b" + + # Unknown branch -> 400. + bad = client.post( + "/chats/chat_bot_a/drawer/branch/switch", + data={"name": "ghost_branch"}, + ) + assert bad.status_code == 400 + + +def test_t98_1_branch_from_turn_emits_branch_created(client, tmp_path): + db = tmp_path / "test.db" + seed_id = _seed_chat(db) + + # Append an extra turn so we can branch from it specifically. + with open_db(db) as conn: + turn_id = append_event( + conn, + kind="user_turn", + payload={"chat_id": "chat_bot_a", "prose": "hi", "segments": []}, + ) + + response = client.post( + f"/chats/chat_bot_a/drawer/branch/from-turn/{turn_id}", + data={"name": "fork_at_turn"}, + ) + assert response.status_code == 200 + + with open_db(db) as conn: + from chat.state.branches import get_branch + + b = get_branch(conn, "fork_at_turn") + assert b is not None + assert b["origin_event_id"] == turn_id + assert b["chat_id"] == "chat_bot_a" + + # Duplicate name -> 400 from service ValueError. + dup = client.post( + f"/chats/chat_bot_a/drawer/branch/from-turn/{turn_id}", + data={"name": "fork_at_turn"}, + ) + assert dup.status_code == 400 + assert seed_id < turn_id # sanity: turn is after chat_created -- 2.52.0 From b25007eb44c21a8317195769cc32762f2a0e1d32 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 03:25:40 -0400 Subject: [PATCH 19/26] feat: drawer significance review panel (T98.2) --- chat/templates/_drawer.html | 46 ++++++++++++++++++ chat/web/drawer.py | 16 +++++++ tests/test_drawer_phase4.py | 93 +++++++++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+) diff --git a/chat/templates/_drawer.html b/chat/templates/_drawer.html index 621d4af..6c1b2c1 100644 --- a/chat/templates/_drawer.html +++ b/chat/templates/_drawer.html @@ -456,6 +456,52 @@
+
+

Significance review

+ {% set total_mem = significance_distribution.values()|sum %} + {% if total_mem %} +
    + {% for level in [0, 1, 2, 3] %} + {% set count = significance_distribution[level] %} + {% set marker = ['·','•','★','★★'][level] %} + {% set pct = (100 * count / total_mem)|round(0, 'floor')|int if total_mem else 0 %} +
  • + {{ marker }} ({{ level }}) + + {{ count }} +
  • + {% endfor %} +
+ {% else %} +

No memories yet.

+ {% endif %} + {% if recent_memories %} +
+ Edit significance (recent memories) +
    + {% for m in recent_memories %} +
  • + {{ ['·','•','★','★★'][m.significance|default(0)] }} + {{ m.pov_summary[:80] }}{% if m.pov_summary|length > 80 %}…{% endif %} +
    + + +
    +
  • + {% endfor %} +
+
+ {% endif %} +
+

Pinned memories ({{ pinned|length }} / {{ pin_cap }})

{% if pinned %} diff --git a/chat/web/drawer.py b/chat/web/drawer.py index 93c017d..251e2ab 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -181,6 +181,21 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)): # returns both flavours and the template highlights the active one). branches = list_branches_with_metadata(conn, chat_id) + # T98.2: significance distribution across this chat's memories. Powers + # the "Significance review" panel — a small histogram letting authors + # spot lopsided buckets (e.g. nothing significant=3 yet) and triage by + # editing individual memory significance values. + sig_rows = conn.execute( + "SELECT significance, COUNT(*) FROM memories " + "WHERE chat_id = ? GROUP BY significance ORDER BY significance", + (chat_id,), + ).fetchall() + significance_distribution = {int(r[0]): int(r[1]) for r in sig_rows} + # Ensure every bucket 0..3 is present so the bar-chart template can + # render a stable axis even when a level has zero rows. + for level in (0, 1, 2, 3): + significance_distribution.setdefault(level, 0) + return TEMPLATES.TemplateResponse( request, "_drawer.html", @@ -209,6 +224,7 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)): "active_events": active_events, "open_threads": open_threads, "branches": branches, + "significance_distribution": significance_distribution, }, ) diff --git a/tests/test_drawer_phase4.py b/tests/test_drawer_phase4.py index 3e0f875..e20f01d 100644 --- a/tests/test_drawer_phase4.py +++ b/tests/test_drawer_phase4.py @@ -187,3 +187,96 @@ def test_t98_1_branch_from_turn_emits_branch_created(client, tmp_path): ) assert dup.status_code == 400 assert seed_id < turn_id # sanity: turn is after chat_created + + +# --------------------------------------------------------------------------- +# T98.2 — significance review panel. +# --------------------------------------------------------------------------- + + +def _seed_memories_for_significance(db: Path) -> list[int]: + """Seed three memories with significance levels 0, 1, 2. Returns ids. + + Uses ``append_and_apply`` (vs ``append_event`` + a final ``project``) + so each row is applied exactly once — the chat row was already + materialised by ``_seed_chat`` and a re-projection would conflict + on ``chats.id`` UNIQUE. + """ + ids: list[int] = [] + with open_db(db) as conn: + for sig in (0, 1, 2): + append_and_apply( + conn, + kind="memory_written", + payload={ + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "pov_summary": f"memory at significance {sig}", + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "significance": sig, + }, + ) + rows = conn.execute( + "SELECT id FROM memories WHERE chat_id = 'chat_bot_a' " + "ORDER BY id ASC" + ).fetchall() + ids = [int(r[0]) for r in rows] + return ids + + +def test_t98_2_distribution_renders_per_significance_bucket(client, tmp_path): + db = tmp_path / "test.db" + _seed_chat(db) + _seed_memories_for_significance(db) + + response = client.get("/chats/chat_bot_a/drawer") + assert response.status_code == 200 + body = response.text + + # Section heading + bar entries for each significance level. + assert "

Significance review

" in body + # All four buckets appear by their canonical label even when count=0. + assert ">★★ (3)<" in body or "(3)" in body + # The distribution markup names each level explicitly. + for level in (0, 1, 2, 3): + assert f"sig-bar sig-{level}" in body + # Three seeded memories (sigs 0, 1, 2) — each has a count = 1 bar. + # We don't pin exact text formatting, just verify the per-level bars + # are present. + + +def test_t98_2_edit_significance_via_existing_route_lands_manual_edit( + client, tmp_path +): + db = tmp_path / "test.db" + _seed_chat(db) + ids = _seed_memories_for_significance(db) + + target_id = ids[0] # initially significance=0 + response = client.post( + f"/chats/chat_bot_a/drawer/memory/{target_id}/significance", + data={"significance": "3"}, + ) + assert response.status_code == 200 + + with open_db(db) as conn: + # Significance updated in the projected table. + row = conn.execute( + "SELECT significance FROM memories WHERE id = ?", (target_id,) + ).fetchone() + assert int(row[0]) == 3 + + # manual_edit landed in the event log with the prior snapshot. + import json as _json + + log_rows = conn.execute( + "SELECT payload_json FROM event_log " + "WHERE kind = 'manual_edit' ORDER BY id DESC LIMIT 1" + ).fetchone() + payload = _json.loads(log_rows[0]) + assert payload["target_kind"] == "memory_significance" + assert int(payload["target_id"]) == target_id + assert payload["prior_value"] == 0 + assert payload["new_value"] == 3 -- 2.52.0 From 461d4410780639d4ff3e76181dbdff03cf8bbae0 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 03:27:59 -0400 Subject: [PATCH 20/26] feat: drawer hide-from-view toggle + turn_hidden manual_edit branch (T98.3) --- chat/state/manual_edit.py | 19 +++++++ chat/templates/_drawer.html | 27 ++++++++++ chat/web/drawer.py | 98 ++++++++++++++++++++++++++++++++++ tests/test_drawer_phase4.py | 103 ++++++++++++++++++++++++++++++++++++ 4 files changed, 247 insertions(+) diff --git a/chat/state/manual_edit.py b/chat/state/manual_edit.py index 3bfff79..fdcc723 100644 --- a/chat/state/manual_edit.py +++ b/chat/state/manual_edit.py @@ -30,6 +30,14 @@ T72.3 adds a per-flag witness toggle: ``{"flag": "you"|"host"|"guest", "value": 0|1}`` and ``prior_value`` mirrors the same shape so an inverse edit can restore the flag. +T98.3 adds a hide-from-view toggle: +- ``turn_hidden`` — flip ``event_log.hidden`` on a single turn row. + Hidden turns are filtered by ``read_recent_dialogue`` (see + :mod:`chat.services.turn_common`) so they vanish from the prompt + without being deleted from the log. ``target_id`` is the integer + ``event_log.id`` of the turn; ``new_value`` is ``{"hidden": 0|1}`` + and ``prior_value`` mirrors the shape so an inverse edit restores it. + Pin toggles intentionally use the existing ``memory_pin_changed`` event (registered in :mod:`chat.state.memory`) rather than ``manual_edit`` so the projection writes both ``pinned`` and ``auto_pinned`` atomically. @@ -138,5 +146,16 @@ def _apply_manual_edit(conn: Connection, e: Event) -> None: f"UPDATE memories SET witness_{flag} = ? WHERE id = ?", (1 if int(new_value["value"]) else 0, int(target_id)), ) + elif kind == "turn_hidden": + # T98.3: hide-from-view toggle on a turn (event_log row). Sets + # ``event_log.hidden`` so :func:`read_recent_dialogue` (which + # filters ``hidden = 0``) drops the row from the prompt window + # without deleting it from the log. ``new_value`` is + # ``{"hidden": 0|1}``. + hidden_int = 1 if int(new_value.get("hidden", 0)) else 0 + conn.execute( + "UPDATE event_log SET hidden = ? WHERE id = ?", + (hidden_int, int(target_id)), + ) # Unknown target_kind: silently no-op for v1. Future kinds (activity # fields, etc.) extend the dispatch above. diff --git a/chat/templates/_drawer.html b/chat/templates/_drawer.html index 6c1b2c1..8614a80 100644 --- a/chat/templates/_drawer.html +++ b/chat/templates/_drawer.html @@ -456,6 +456,33 @@
+
+

Recent turns

+ {% if recent_turns %} +
    + {% for t in recent_turns %} +
  • + #{{ t.event_id }} {{ t.kind }} + {{ t.speaker }}: + {{ t.excerpt }}{% if t.excerpt|length >= 120 %}…{% endif %} +
    + + +
    +
  • + {% endfor %} +
+ {% else %} +

No turns yet.

+ {% endif %} +
+

Significance review

{% set total_mem = significance_distribution.values()|sum %} diff --git a/chat/web/drawer.py b/chat/web/drawer.py index 251e2ab..3a8a6d0 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -176,6 +176,43 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)): active_events = list_active_events(conn, chat_id) open_threads = list_open_threads(conn, chat_id) + # T98.3: recent turns (user_turn / assistant_turn) for the hide-from-view + # panel. Includes ``hidden`` rows so the user can un-hide them — the + # filter on the read side (read_recent_dialogue) is what drops hidden + # rows from the prompt; the drawer panel always shows everything. + turn_rows = conn.execute( + """ + SELECT id, kind, payload_json, hidden + FROM event_log + WHERE kind IN ('user_turn', 'assistant_turn', 'user_turn_edit') + AND superseded_by IS NULL + ORDER BY id DESC + LIMIT ? + """, + (RECENT_LIMIT,), + ).fetchall() + recent_turns: list[dict] = [] + for row in turn_rows: + try: + payload = json.loads(row[2]) if row[2] else {} + except (json.JSONDecodeError, TypeError): + payload = {} + if payload.get("chat_id") != chat_id: + continue + text = payload.get("prose") or payload.get("text") or "" + speaker = payload.get("speaker_id") or ( + "you" if row[1].startswith("user") else "?" + ) + recent_turns.append( + { + "event_id": int(row[0]), + "kind": row[1], + "speaker": speaker, + "excerpt": (text or "").replace("\n", " ")[:120], + "hidden": bool(row[3]), + } + ) + # T98.1: branch metadata (every chat sees the global branch list — branches # may be chat-scoped or global, so :func:`list_branches_with_metadata` # returns both flavours and the template highlights the active one). @@ -225,6 +262,7 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)): "open_threads": open_threads, "branches": branches, "significance_distribution": significance_distribution, + "recent_turns": recent_turns, }, ) @@ -1173,6 +1211,66 @@ async def switch_branch( return await drawer(chat_id, request, conn) +@router.post( + "/chats/{chat_id}/drawer/turn/hide/{event_id}", + response_class=HTMLResponse, +) +async def hide_turn( + chat_id: str, + event_id: int, + request: Request, + hidden: int = Form(...), + conn=Depends(get_conn), +): + """Toggle ``event_log.hidden`` on a turn via the ``turn_hidden`` + ``manual_edit`` projector branch. + + The route validates the target is an actual turn-shaped row in this + chat (so a stray click on the chat panel can't hide a system event) + and snapshots the prior ``hidden`` value for §6.4 reversibility. + """ + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + + row = conn.execute( + "SELECT kind, payload_json, hidden FROM event_log WHERE id = ?", + (int(event_id),), + ).fetchone() + if row is None: + raise HTTPException( + status_code=404, detail=f"event not found: {event_id}" + ) + if row[0] not in ("user_turn", "assistant_turn", "user_turn_edit"): + raise HTTPException( + status_code=400, + detail=f"event {event_id} is not a turn (kind={row[0]})", + ) + try: + payload = json.loads(row[1]) if row[1] else {} + except (json.JSONDecodeError, TypeError): + payload = {} + if payload.get("chat_id") != chat_id: + raise HTTPException( + status_code=404, + detail=f"event {event_id} not in chat {chat_id}", + ) + + prior_hidden = 1 if int(row[2]) else 0 + new_hidden = 1 if int(hidden) else 0 + append_and_apply( + conn, + kind="manual_edit", + payload={ + "target_kind": "turn_hidden", + "target_id": int(event_id), + "prior_value": {"hidden": prior_hidden}, + "new_value": {"hidden": new_hidden}, + }, + ) + return await drawer(chat_id, request, conn) + + @router.post( "/chats/{chat_id}/drawer/branch/from-turn/{event_id}", response_class=HTMLResponse, diff --git a/tests/test_drawer_phase4.py b/tests/test_drawer_phase4.py index e20f01d..30f3336 100644 --- a/tests/test_drawer_phase4.py +++ b/tests/test_drawer_phase4.py @@ -280,3 +280,106 @@ def test_t98_2_edit_significance_via_existing_route_lands_manual_edit( assert int(payload["target_id"]) == target_id assert payload["prior_value"] == 0 assert payload["new_value"] == 3 + + +# --------------------------------------------------------------------------- +# T98.3 — hide-from-view toggle. +# --------------------------------------------------------------------------- + + +def _seed_turns(db: Path) -> tuple[int, int]: + """Append one user_turn + one assistant_turn; return their event ids.""" + with open_db(db) as conn: + user_id = append_and_apply( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": "How are you doing today?", + "segments": [], + }, + ) + bot_id = append_and_apply( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": "Quite well, thanks for asking!", + "truncated": False, + "user_turn_id": user_id, + }, + ) + return user_id, bot_id + + +def test_t98_3_hide_turn_flips_event_log_hidden_via_manual_edit( + client, tmp_path +): + db = tmp_path / "test.db" + _seed_chat(db) + user_id, bot_id = _seed_turns(db) + + response = client.post( + f"/chats/chat_bot_a/drawer/turn/hide/{user_id}", + data={"hidden": "1"}, + ) + assert response.status_code == 200 + + with open_db(db) as conn: + # event_log.hidden flipped to 1. + row = conn.execute( + "SELECT hidden FROM event_log WHERE id = ?", (user_id,) + ).fetchone() + assert int(row[0]) == 1 + + # manual_edit landed with the prior snapshot. + import json as _json + + log = conn.execute( + "SELECT payload_json FROM event_log " + "WHERE kind = 'manual_edit' ORDER BY id DESC LIMIT 1" + ).fetchone() + payload = _json.loads(log[0]) + assert payload["target_kind"] == "turn_hidden" + assert int(payload["target_id"]) == user_id + assert payload["prior_value"] == {"hidden": 0} + assert payload["new_value"] == {"hidden": 1} + + +def test_t98_3_hidden_turn_disappears_from_read_recent_dialogue( + client, tmp_path +): + """Hiding a turn must drop it from the prompt-window read. + + ``read_recent_dialogue`` (chat.services.turn_common) filters + ``hidden = 0`` server-side, so flipping the flag via the drawer + route must surface immediately. + """ + db = tmp_path / "test.db" + _seed_chat(db) + user_id, bot_id = _seed_turns(db) + + # Sanity baseline — both turns visible before the hide. + with open_db(db) as conn: + from chat.services.turn_common import read_recent_dialogue + + before = read_recent_dialogue(conn, "chat_bot_a", limit=10) + before_ids = [t["event_id"] for t in before] + assert user_id in before_ids + assert bot_id in before_ids + + # Hide the user turn via the drawer route. + response = client.post( + f"/chats/chat_bot_a/drawer/turn/hide/{user_id}", + data={"hidden": "1"}, + ) + assert response.status_code == 200 + + with open_db(db) as conn: + from chat.services.turn_common import read_recent_dialogue + + after = read_recent_dialogue(conn, "chat_bot_a", limit=10) + after_ids = [t["event_id"] for t in after] + assert user_id not in after_ids + assert bot_id in after_ids # the unhidden bot turn still surfaces -- 2.52.0 From c4fa11fe787fe2dd2dc4b229812641a2170cb7b0 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 03:29:07 -0400 Subject: [PATCH 21/26] feat: drawer surgical delete with cascade preview (T98.4) --- chat/web/drawer.py | 87 +++++++++++++++++++++++++++++++++++++ tests/test_drawer_phase4.py | 73 +++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+) diff --git a/chat/web/drawer.py b/chat/web/drawer.py index 3a8a6d0..9ac1ab8 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -1211,6 +1211,93 @@ async def switch_branch( return await drawer(chat_id, request, conn) +@router.get( + "/chats/{chat_id}/drawer/turn/delete-preview/{event_id}", + response_class=HTMLResponse, +) +async def delete_preview( + chat_id: str, + event_id: int, + request: Request, + conn=Depends(get_conn), +): + """Render an :class:`ImpactReport` for ``event_id`` as a small modal. + + Read-only — :func:`compute_delete_impact` does not mutate the + database. The modal contains a confirmation form posting to + :func:`delete_turn` below; HTMX swaps the fragment into a modal + target on the chat page. + """ + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + + report = compute_delete_impact(conn, target_event_id=int(event_id)) + + # Build the modal HTML directly — the impact report is small and + # reusing the drawer template would require a fragment include just + # for this surface. Mirrors the rewind-preview style in + # :func:`chat.web.turns.rewind_preview`. + items_html = "".join( + f"
  • {item.kind}: {item.description}
  • " + for item in report.cascading + ) + notes_html = "".join(f"
  • {note}
  • " for note in report.notes) + body = ( + "
    " + f"

    Delete event {report.target_event_id}?

    " + f"

    This will discard {len(report.cascading)} events. Cascade:

    " + f"
      {items_html or '
    • none
    • '}
    " + f"
      {notes_html}
    " + f"
    " + "" + "
    " + "
    " + ) + return HTMLResponse(body) + + +@router.post( + "/chats/{chat_id}/drawer/turn/delete/{event_id}", + response_class=HTMLResponse, +) +async def delete_turn( + chat_id: str, + event_id: int, + request: Request, + conn=Depends(get_conn), +): + """Delete a turn (and everything after) by invoking the existing rewind path. + + The :func:`chat.services.rewind.execute_rewind` API takes + ``after_event_id``: it removes events with id strictly greater than + that argument. To make ``event_id`` itself disappear we pass + ``after_event_id = event_id - 1`` — a thin adapter, not a + re-implementation of rewind. + + A snapshot is taken before truncation (inside ``execute_rewind``) + so the user can recover via the snapshot index. + """ + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + + settings = request.app.state.settings + execute_rewind( + db_path=settings.db_path, + data_dir=settings.data_dir, + after_event_id=int(event_id) - 1, + ) + # ``conn`` is now stale (the rewind opened its own connection and + # truncated/reprojected). Re-render the drawer through a fresh open + # so the partial reflects the truncated state. + from chat.db.connection import open_db + + with open_db(settings.db_path) as fresh: + return await drawer(chat_id, request, fresh) + + @router.post( "/chats/{chat_id}/drawer/turn/hide/{event_id}", response_class=HTMLResponse, diff --git a/tests/test_drawer_phase4.py b/tests/test_drawer_phase4.py index 30f3336..9ec4a66 100644 --- a/tests/test_drawer_phase4.py +++ b/tests/test_drawer_phase4.py @@ -383,3 +383,76 @@ def test_t98_3_hidden_turn_disappears_from_read_recent_dialogue( after_ids = [t["event_id"] for t in after] assert user_id not in after_ids assert bot_id in after_ids # the unhidden bot turn still surfaces + + +# --------------------------------------------------------------------------- +# T98.4 — surgical delete with cascade preview. +# --------------------------------------------------------------------------- + + +def test_t98_4_delete_preview_returns_impact_report_html(client, tmp_path): + db = tmp_path / "test.db" + _seed_chat(db) + user_id, bot_id = _seed_turns(db) + + response = client.get( + f"/chats/chat_bot_a/drawer/turn/delete-preview/{user_id}" + ) + assert response.status_code == 200 + body = response.text + + # Modal markup with the event id and the cascade list. + assert "delete-impact-modal" in body + assert f"Delete event {user_id}?" in body + assert "delete-impact-cascade" in body + # Both turns ride along in the cascade — user_turn at user_id, then + # the assistant_turn at bot_id (>= user_id). + assert "user_turn" in body + assert "assistant_turn" in body + # Confirm-form posts to the delete route. + assert f"/drawer/turn/delete/{user_id}" in body + + +def test_t98_4_delete_invokes_rewind_and_drops_cascade(client, tmp_path): + db = tmp_path / "test.db" + _seed_chat(db) + user_id, bot_id = _seed_turns(db) + + # Append a third turn after the assistant_turn so we can verify the + # cascade catches everything from user_id forward. + with open_db(db) as conn: + extra_id = append_and_apply( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": "follow-up", + "segments": [], + }, + ) + + # Sanity: all three turn rows exist. + with open_db(db) as conn: + turn_count = conn.execute( + "SELECT COUNT(*) FROM event_log " + "WHERE kind IN ('user_turn', 'assistant_turn')" + ).fetchone()[0] + assert turn_count == 3 + + # Delete from user_id forward. + response = client.post(f"/chats/chat_bot_a/drawer/turn/delete/{user_id}") + assert response.status_code == 200 + + # All three turns are gone — the rewind truncated the log past + # user_id - 1, removing user_id, bot_id, and extra_id. + with open_db(db) as conn: + turn_count = conn.execute( + "SELECT COUNT(*) FROM event_log " + "WHERE kind IN ('user_turn', 'assistant_turn')" + ).fetchone()[0] + assert turn_count == 0 + for ev_id in (user_id, bot_id, extra_id): + row = conn.execute( + "SELECT 1 FROM event_log WHERE id = ?", (ev_id,) + ).fetchone() + assert row is None, f"event {ev_id} should have been deleted" -- 2.52.0 From 4546bc0d9c820ee3088ecfad538fc56638626c81 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 03:35:54 -0400 Subject: [PATCH 22/26] feat: drawer remaining v1 field edits (T98.5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit of chat/state/manual_edit.py target_kind dispatch found two §6.4 fields without drawer affordances despite being already-projected text columns: chat_state.narrative_anchor and chat_state.weather. Both land via new manual_edit branches (target_kind chat_narrative_anchor and chat_weather) plus paired drawer routes and Scene-section text inputs. The container properties_json blob is intentionally deferred — bounded JSON edits aren't wired through manual_edit and the drawer never surfaces multiple containers at once, so v1 leaves it out. --- chat/state/manual_edit.py | 19 ++++++++ chat/templates/_drawer.html | 20 +++++++++ chat/web/drawer.py | 87 +++++++++++++++++++++++++++++++++++++ tests/test_drawer_phase4.py | 65 +++++++++++++++++++++++++++ 4 files changed, 191 insertions(+) diff --git a/chat/state/manual_edit.py b/chat/state/manual_edit.py index fdcc723..049b4ca 100644 --- a/chat/state/manual_edit.py +++ b/chat/state/manual_edit.py @@ -38,6 +38,12 @@ T98.3 adds a hide-from-view toggle: ``event_log.id`` of the turn; ``new_value`` is ``{"hidden": 0|1}`` and ``prior_value`` mirrors the shape so an inverse edit restores it. +T98.5 finishes the v1 drawer surface with two chat-scope text edits: +- ``chat_narrative_anchor`` and ``chat_weather`` — string overwrites of + the matching ``chat_state`` columns. ``target_id`` is the chat id + (``chats.id``); ``new_value`` is the new string and ``prior_value`` + carries the previous content for §6.4 reversibility. + Pin toggles intentionally use the existing ``memory_pin_changed`` event (registered in :mod:`chat.state.memory`) rather than ``manual_edit`` so the projection writes both ``pinned`` and ``auto_pinned`` atomically. @@ -157,5 +163,18 @@ def _apply_manual_edit(conn: Connection, e: Event) -> None: "UPDATE event_log SET hidden = ? WHERE id = ?", (hidden_int, int(target_id)), ) + elif kind == "chat_narrative_anchor": + # T98.5: string overwrite of ``chat_state.narrative_anchor`` for + # the chat keyed by ``target_id``. + conn.execute( + "UPDATE chat_state SET narrative_anchor = ? WHERE chat_id = ?", + (str(new_value), str(target_id)), + ) + elif kind == "chat_weather": + # T98.5: string overwrite of ``chat_state.weather``. + conn.execute( + "UPDATE chat_state SET weather = ? WHERE chat_id = ?", + (str(new_value), str(target_id)), + ) # Unknown target_kind: silently no-op for v1. Future kinds (activity # fields, etc.) extend the dispatch above. diff --git a/chat/templates/_drawer.html b/chat/templates/_drawer.html index 8614a80..8cfdd5f 100644 --- a/chat/templates/_drawer.html +++ b/chat/templates/_drawer.html @@ -16,6 +16,26 @@

    No active container.

    {% endif %}

    Time: {{ chat.time }}

    +
    + + +
    +
    + + +
    {% if scene %}
    CHAT_NARRATIVE_ANCHOR_MAX: + raise HTTPException( + status_code=400, + detail=( + f"narrative_anchor exceeds {CHAT_NARRATIVE_ANCHOR_MAX} chars " + f"(got {len(new_value)})" + ), + ) + + prior = chat.get("narrative_anchor") or "" + append_and_apply( + conn, + kind="manual_edit", + payload={ + "target_kind": "chat_narrative_anchor", + "target_id": chat_id, + "prior_value": prior, + "new_value": new_value, + }, + ) + return await drawer(chat_id, request, conn) + + +@router.post( + "/chats/{chat_id}/drawer/chat/weather", + response_class=HTMLResponse, +) +async def edit_chat_weather( + chat_id: str, + request: Request, + new_value: str = Form(...), + conn=Depends(get_conn), +): + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + if len(new_value) > CHAT_WEATHER_MAX: + raise HTTPException( + status_code=400, + detail=( + f"weather exceeds {CHAT_WEATHER_MAX} chars " + f"(got {len(new_value)})" + ), + ) + + prior = chat.get("weather") or "" + append_and_apply( + conn, + kind="manual_edit", + payload={ + "target_kind": "chat_weather", + "target_id": chat_id, + "prior_value": prior, + "new_value": new_value, + }, + ) + return await drawer(chat_id, request, conn) + + @router.post( "/chats/{chat_id}/drawer/branch/from-turn/{event_id}", response_class=HTMLResponse, diff --git a/tests/test_drawer_phase4.py b/tests/test_drawer_phase4.py index 9ec4a66..f94f266 100644 --- a/tests/test_drawer_phase4.py +++ b/tests/test_drawer_phase4.py @@ -456,3 +456,68 @@ def test_t98_4_delete_invokes_rewind_and_drops_cascade(client, tmp_path): "SELECT 1 FROM event_log WHERE id = ?", (ev_id,) ).fetchone() assert row is None, f"event {ev_id} should have been deleted" + + +# --------------------------------------------------------------------------- +# T98.5 — remaining v1 edits (chat narrative anchor + weather). +# --------------------------------------------------------------------------- + + +def test_t98_5_edit_chat_narrative_anchor_emits_manual_edit(client, tmp_path): + db = tmp_path / "test.db" + _seed_chat(db) + + response = client.post( + "/chats/chat_bot_a/drawer/chat/narrative-anchor", + data={"new_value": "Late evening, after dinner"}, + ) + assert response.status_code == 200 + + with open_db(db) as conn: + row = conn.execute( + "SELECT narrative_anchor FROM chat_state WHERE chat_id = ?", + ("chat_bot_a",), + ).fetchone() + assert row[0] == "Late evening, after dinner" + + import json as _json + + log = conn.execute( + "SELECT payload_json FROM event_log " + "WHERE kind = 'manual_edit' ORDER BY id DESC LIMIT 1" + ).fetchone() + payload = _json.loads(log[0]) + assert payload["target_kind"] == "chat_narrative_anchor" + assert payload["target_id"] == "chat_bot_a" + assert payload["prior_value"] == "Day 1" + assert payload["new_value"] == "Late evening, after dinner" + + +def test_t98_5_edit_chat_weather_emits_manual_edit(client, tmp_path): + db = tmp_path / "test.db" + _seed_chat(db) + + response = client.post( + "/chats/chat_bot_a/drawer/chat/weather", + data={"new_value": "thunderstorm rolling in"}, + ) + assert response.status_code == 200 + + with open_db(db) as conn: + row = conn.execute( + "SELECT weather FROM chat_state WHERE chat_id = ?", + ("chat_bot_a",), + ).fetchone() + assert row[0] == "thunderstorm rolling in" + + import json as _json + + log = conn.execute( + "SELECT payload_json FROM event_log " + "WHERE kind = 'manual_edit' ORDER BY id DESC LIMIT 1" + ).fetchone() + payload = _json.loads(log[0]) + assert payload["target_kind"] == "chat_weather" + assert payload["target_id"] == "chat_bot_a" + assert payload["prior_value"] == "" + assert payload["new_value"] == "thunderstorm rolling in" -- 2.52.0 From a5f0e69d44dd186a81b98db7120f998eac05059d Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 03:46:49 -0400 Subject: [PATCH 23/26] feat: snapshot UX (manual trigger + list + restore + preview) (T99) --- chat/app.py | 2 + chat/templates/layout.html | 1 + chat/templates/snapshots.html | 66 ++++++++++++ chat/web/snapshots.py | 190 ++++++++++++++++++++++++++++++++++ tests/test_snapshot_ux.py | 182 ++++++++++++++++++++++++++++++++ 5 files changed, 441 insertions(+) create mode 100644 chat/templates/snapshots.html create mode 100644 chat/web/snapshots.py create mode 100644 tests/test_snapshot_ux.py diff --git a/chat/app.py b/chat/app.py index 9e2c74b..0ae516c 100644 --- a/chat/app.py +++ b/chat/app.py @@ -33,6 +33,7 @@ from chat.web.kickoff import router as kickoff_router from chat.web.middleware import FirstRunRedirectMiddleware from chat.web.nav import router as nav_router from chat.web.settings import router as settings_router +from chat.web.snapshots import router as snapshots_router from chat.web.sse import router as sse_router from chat.web.turns import router as turns_router @@ -137,6 +138,7 @@ async def http_exception_handler(request: Request, exc: StarletteHTTPException): app.include_router(bots_router) app.include_router(kickoff_router) app.include_router(settings_router) +app.include_router(snapshots_router) app.include_router(nav_router) app.include_router(chat_router) app.include_router(drawer_router) diff --git a/chat/templates/layout.html b/chat/templates/layout.html index 7b1954b..197a39b 100644 --- a/chat/templates/layout.html +++ b/chat/templates/layout.html @@ -5,6 +5,7 @@ diff --git a/chat/templates/snapshots.html b/chat/templates/snapshots.html new file mode 100644 index 0000000..6039c16 --- /dev/null +++ b/chat/templates/snapshots.html @@ -0,0 +1,66 @@ +{% extends "layout.html" %} +{% block title %}Snapshots - chat{% endblock %} +{% block content %} + + +{% if preview %} +
    +

    Preview: {{ preview.snapshot_id }}

    +
    +
    kind
    {{ preview.kind }}
    +
    filename
    {{ preview.filename }}
    +
    file size (bytes)
    {{ preview.file_size_bytes }}
    +
    snapshot last_event_id
    {{ preview.last_event_id }}
    +
    current event_log max id
    {{ preview.current_event_log_max_id }}
    +
    events since snapshot
    {{ preview.event_delta }}
    +
    events stored in snapshot
    {{ preview.event_log_rows_in_snapshot }}
    +
    +
    +{% endif %} + +{% if snapshots %} + + + + + + + + + + + + + {% for snap in snapshots %} + + + + + + + + + {% endfor %} + +
    IDKindCreated (UTC)Size (bytes)last_event_idActions
    {{ snap.snapshot_id }}{{ snap.kind }}{{ snap.created_at }}{{ snap.file_size_bytes }}{{ snap.last_event_id if snap.last_event_id is not none else '?' }} + Preview +
    + Restore +
    + + + +
    +
    +
    +{% else %} +

    No snapshots yet. Use "Take snapshot now" to create one.

    +{% endif %} +{% endblock %} diff --git a/chat/web/snapshots.py b/chat/web/snapshots.py new file mode 100644 index 0000000..ae3cc30 --- /dev/null +++ b/chat/web/snapshots.py @@ -0,0 +1,190 @@ +"""Snapshot UX routes (T99). + +Surfaces the existing snapshot service (``chat/services/snapshot.py``) +through HTML so the user can see, take, restore, and preview snapshots +without dropping to a shell. + +Routes: + +* ``GET /snapshots`` list all snapshots (both kinds) +* ``POST /snapshots/take`` take a periodic snapshot now +* ``POST /snapshots/restore/{id}`` restore (requires matching ``confirm_id``) +* ``GET /snapshots/{id}/preview`` show metadata + delta vs current + +The ``snapshot_id`` is the filename stem (the UTC timestamp written by +:func:`chat.services.snapshot.take_snapshot`) — there's no separate UUID, +and the timestamp filename is already unique per snapshot kind. Both +periodic and rewind snapshots share the same id space lookup-wise, so +the restore + preview routes accept ``kind`` as a form/query param to +disambiguate. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +from fastapi import APIRouter, Depends, Form, HTTPException, Request +from fastapi.responses import HTMLResponse, RedirectResponse +from fastapi.templating import Jinja2Templates + +from chat.services.snapshot import ( + restore_from_snapshot, + take_snapshot, +) +from chat.web.bots import get_conn + +TEMPLATES = Jinja2Templates( + directory=str(Path(__file__).resolve().parent.parent / "templates") +) + +router = APIRouter() + +SNAPSHOT_KINDS = ("periodic", "rewind") + + +def _list_all_snapshots(data_dir: Path) -> list[dict]: + """Walk ``data/snapshots/{kind}/`` for both kinds and collect metadata. + + Each entry exposes the fields the template needs: ``snapshot_id`` + (filename stem), ``kind``, ``created_at`` (file mtime as ISO), the + on-disk ``file_size_bytes``, and the snapshot's stored + ``last_event_id`` (parsed from the JSON body — small enough that + listing isn't a performance concern for the handful of files we keep). + """ + from datetime import datetime, timezone + + rows: list[dict] = [] + for kind in SNAPSHOT_KINDS: + snap_dir = data_dir / "snapshots" / kind + if not snap_dir.exists(): + continue + for path in sorted(snap_dir.glob("*.json")): + try: + dump = json.loads(path.read_text()) + last_event_id = dump.get("last_event_id", 0) + except (OSError, json.JSONDecodeError): + # Corrupt or unreadable files still get listed so the + # user can see and delete them; just don't crash here. + last_event_id = None + stat = path.stat() + rows.append( + { + "snapshot_id": path.stem, + "kind": kind, + "created_at": datetime.fromtimestamp( + stat.st_mtime, tz=timezone.utc + ).isoformat(), + "file_size_bytes": stat.st_size, + "last_event_id": last_event_id, + "filename": path.name, + } + ) + # Newest first for display. + rows.sort(key=lambda r: r["created_at"], reverse=True) + return rows + + +def _resolve_snapshot_path( + data_dir: Path, snapshot_id: str, kind: str +) -> Path: + """Map an ``(id, kind)`` pair to the on-disk file, or 404.""" + if kind not in SNAPSHOT_KINDS: + raise HTTPException(status_code=400, detail=f"unknown kind: {kind}") + path = data_dir / "snapshots" / kind / f"{snapshot_id}.json" + if not path.exists(): + raise HTTPException(status_code=404, detail="snapshot not found") + return path + + +@router.get("/snapshots", response_class=HTMLResponse) +async def snapshots_list(request: Request): + settings = request.app.state.settings + rows = _list_all_snapshots(settings.data_dir) + return TEMPLATES.TemplateResponse( + request, + "snapshots.html", + {"snapshots": rows, "active_nav": "snapshots"}, + ) + + +@router.post("/snapshots/take") +async def snapshots_take(request: Request, conn=Depends(get_conn)): + """Take a periodic snapshot now. + + We use ``kind="periodic"`` for manual snapshots since they're + user-initiated checkpoints, not pre-rewind safety dumps. They count + against the 5-snapshot retention but that's fine — manual ones are + the most recent so they're the last to be pruned. + """ + settings = request.app.state.settings + take_snapshot(conn, data_dir=settings.data_dir, kind="periodic") + return RedirectResponse(url="/snapshots", status_code=303) + + +@router.post("/snapshots/restore/{snapshot_id}") +async def snapshots_restore( + snapshot_id: str, + request: Request, + confirm_id: str = Form(""), + kind: str = Form("periodic"), + conn=Depends(get_conn), +): + """Hard-confirm restore: ``confirm_id`` must equal the path id. + + Mismatched confirm → 400 (without touching the DB). On match, the + existing :func:`restore_from_snapshot` clears projected tables and + re-loads them from the dump. + """ + if confirm_id != snapshot_id: + raise HTTPException( + status_code=400, + detail="confirm_id does not match snapshot id", + ) + settings = request.app.state.settings + path = _resolve_snapshot_path(settings.data_dir, snapshot_id, kind) + restore_from_snapshot(conn, path) + return RedirectResponse(url="/snapshots", status_code=303) + + +@router.get("/snapshots/{snapshot_id}/preview", response_class=HTMLResponse) +async def snapshots_preview( + snapshot_id: str, + request: Request, + kind: str = "periodic", + conn=Depends(get_conn), +): + """Show snapshot metadata + a basic delta against the current event log. + + Phase 4 keeps this simple: the snapshot's ``last_event_id`` plus the + current ``MAX(event_log.id)`` is enough to tell the user how far the + log has moved on. A richer per-table diff is a Phase 4.5+ concern. + """ + settings = request.app.state.settings + path = _resolve_snapshot_path(settings.data_dir, snapshot_id, kind) + dump = json.loads(path.read_text()) + last_event_id = dump.get("last_event_id", 0) + + cur = conn.execute("SELECT MAX(id) FROM event_log") + row = cur.fetchone() + current_max_id = row[0] if row[0] is not None else 0 + + stat = path.stat() + return TEMPLATES.TemplateResponse( + request, + "snapshots.html", + { + "snapshots": _list_all_snapshots(settings.data_dir), + "active_nav": "snapshots", + "preview": { + "snapshot_id": snapshot_id, + "kind": kind, + "filename": path.name, + "file_size_bytes": stat.st_size, + "last_event_id": last_event_id, + "current_event_log_max_id": current_max_id, + "event_delta": current_max_id - last_event_id, + "event_log_rows_in_snapshot": len(dump.get("event_log", [])), + }, + }, + ) diff --git a/tests/test_snapshot_ux.py b/tests/test_snapshot_ux.py new file mode 100644 index 0000000..347f9ce --- /dev/null +++ b/tests/test_snapshot_ux.py @@ -0,0 +1,182 @@ +"""Tests for Task 99 — snapshot UX (manual trigger + list + restore + preview). + +Phase 4 surfaces the existing snapshot infrastructure (Phase 1 T20 / T31) +through HTML routes so the user can: + +* see what snapshots exist, +* take one on demand, +* restore one with a hard confirm, +* peek at metadata before restoring. + +The underlying service API lives in ``chat/services/snapshot.py`` and is +already exercised by ``test_snapshot.py``; here we only verify the web +surface wires the existing functions correctly. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from chat.app import app +from chat.db.connection import open_db +from chat.eventlog.log import append_event +from chat.eventlog.projector import project + + +def _bot_payload(bot_id: str, name: str) -> dict: + return { + "id": bot_id, + "name": name, + "persona": "fancy", + "voice_samples": ["sample"], + "traits": ["shy"], + "backstory": "", + "initial_relationship_to_you": "coworker", + "kickoff_prose": "", + } + + +@pytest.fixture +def client(tmp_path, monkeypatch): + """A TestClient whose db + data_dir live under ``tmp_path``. + + ``load_settings`` derives ``data_dir`` from ``CHAT_DB_PATH``'s parent + when ``CHAT_DATA_DIR`` is unset (see ``chat/config.py``), so this also + isolates the ``data/snapshots/`` tree to ``tmp_path``. + """ + config_path = tmp_path / "config.toml" + config_path.write_text('featherless_api_key = "test"\n') + monkeypatch.setenv("CHAT_CONFIG_PATH", str(config_path)) + monkeypatch.setenv("CHAT_DB_PATH", str(tmp_path / "test.db")) + + with TestClient(app) as c: + c.tmp_path = tmp_path # type: ignore[attr-defined] + yield c + + +def _seed_bot(db_path: Path, bot_id: str = "bot_a", name: str = "BotA") -> None: + with open_db(db_path) as conn: + append_event(conn, kind="bot_authored", payload=_bot_payload(bot_id, name)) + project(conn) + + +def _take_snapshot_via_service( + db_path: Path, data_dir: Path, kind: str = "periodic" +) -> Path: + from chat.services.snapshot import take_snapshot + + with open_db(db_path) as conn: + return take_snapshot(conn, data_dir=data_dir, kind=kind) + + +def test_list_snapshots_renders_page(client, tmp_path): + _seed_bot(tmp_path / "test.db", "bot_a", "BotA") + # Take two snapshots through the service so the listing has rows. + p1 = _take_snapshot_via_service(tmp_path / "test.db", tmp_path, kind="periodic") + p2 = _take_snapshot_via_service(tmp_path / "test.db", tmp_path, kind="rewind") + + response = client.get("/snapshots") + assert response.status_code == 200 + body = response.text + # Both filenames should appear in the listing. + assert p1.stem in body + assert p2.stem in body + # Both kinds should be visible. + assert "periodic" in body + assert "rewind" in body + + +def test_take_snapshot_creates_new(client, tmp_path): + _seed_bot(tmp_path / "test.db", "bot_a", "BotA") + snapshot_dir = tmp_path / "snapshots" / "periodic" + + before = ( + len(list(snapshot_dir.glob("*.json"))) if snapshot_dir.exists() else 0 + ) + response = client.post("/snapshots/take", follow_redirects=False) + assert response.status_code == 303 + assert response.headers["location"] == "/snapshots" + + after = len(list(snapshot_dir.glob("*.json"))) + assert after == before + 1 + + +def test_restore_snapshot_with_correct_confirm(client, tmp_path): + db_path = tmp_path / "test.db" + _seed_bot(db_path, "bot_a", "BotA") + snapshot_path = _take_snapshot_via_service( + db_path, tmp_path, kind="periodic" + ) + snapshot_id = snapshot_path.stem # filename without extension + + # Mutate the DB after the snapshot was taken — restoring should erase + # the new bot. + with open_db(db_path) as conn: + append_event( + conn, kind="bot_authored", payload=_bot_payload("bot_b", "BotB") + ) + project(conn) + bots_before = conn.execute( + "SELECT id FROM bots ORDER BY id" + ).fetchall() + assert {r[0] for r in bots_before} == {"bot_a", "bot_b"} + + response = client.post( + f"/snapshots/restore/{snapshot_id}", + data={"confirm_id": snapshot_id, "kind": "periodic"}, + follow_redirects=False, + ) + assert response.status_code == 303 + + with open_db(db_path) as conn: + bots_after = conn.execute( + "SELECT id FROM bots ORDER BY id" + ).fetchall() + # The post-snapshot bot should be gone. + assert {r[0] for r in bots_after} == {"bot_a"} + + +def test_restore_snapshot_wrong_confirm_400(client, tmp_path): + db_path = tmp_path / "test.db" + _seed_bot(db_path, "bot_a", "BotA") + snapshot_path = _take_snapshot_via_service( + db_path, tmp_path, kind="periodic" + ) + snapshot_id = snapshot_path.stem + + response = client.post( + f"/snapshots/restore/{snapshot_id}", + data={"confirm_id": "not_the_right_id", "kind": "periodic"}, + follow_redirects=False, + ) + assert response.status_code == 400 + + +def test_preview_renders_metadata(client, tmp_path): + db_path = tmp_path / "test.db" + _seed_bot(db_path, "bot_a", "BotA") + snapshot_path = _take_snapshot_via_service( + db_path, tmp_path, kind="periodic" + ) + snapshot_id = snapshot_path.stem + + # Append more events post-snapshot so the delta is non-zero. + with open_db(db_path) as conn: + append_event( + conn, kind="bot_authored", payload=_bot_payload("bot_b", "BotB") + ) + project(conn) + + response = client.get( + f"/snapshots/{snapshot_id}/preview", params={"kind": "periodic"} + ) + assert response.status_code == 200 + body = response.text + assert snapshot_id in body + # Snapshot's last_event_id and current event_log size should appear. + dump = json.loads(snapshot_path.read_text()) + assert str(dump["last_event_id"]) in body -- 2.52.0 From 0a2c5924f97d18ee947c56e4d6b623c587b1173a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 03:46:52 -0400 Subject: [PATCH 24/26] feat: cross-chat search UX (top-bar + results page) (T100) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires T93's `search_all_memories` service into a small read-only HTML surface so users can find a memory across every chat in the database. * `chat/web/search.py` (new): GET `/search?q=...` runs the FTS service with k=50, hydrates each row with bot name + scene timestamp, and renders `search.html`. Empty `q` short-circuits to no results so the top-bar form can submit even with an empty input. * `chat/templates/search.html` (new): empty-state placeholder, results list with chat-level "Open chat" links (`/chats/{chat_id}` — memories don't carry an event_id today, so no per-turn anchor). * `chat/templates/layout.html`: append a small `
    ` to the rail nav, additive only. * `chat/app.py`: register `search_router` (additive import + include). * `tests/test_search_ux.py`: 3 tests — multi-chat results, empty-query placeholder, chat link. --- chat/app.py | 2 + chat/templates/layout.html | 7 ++ chat/templates/search.html | 37 ++++++++++ chat/web/search.py | 92 +++++++++++++++++++++++++ tests/test_search_ux.py | 135 +++++++++++++++++++++++++++++++++++++ 5 files changed, 273 insertions(+) create mode 100644 chat/templates/search.html create mode 100644 chat/web/search.py create mode 100644 tests/test_search_ux.py diff --git a/chat/app.py b/chat/app.py index 9e2c74b..a5d66dc 100644 --- a/chat/app.py +++ b/chat/app.py @@ -32,6 +32,7 @@ from chat.web.drawer import router as drawer_router from chat.web.kickoff import router as kickoff_router from chat.web.middleware import FirstRunRedirectMiddleware from chat.web.nav import router as nav_router +from chat.web.search import router as search_router from chat.web.settings import router as settings_router from chat.web.sse import router as sse_router from chat.web.turns import router as turns_router @@ -140,6 +141,7 @@ app.include_router(settings_router) app.include_router(nav_router) app.include_router(chat_router) app.include_router(drawer_router) +app.include_router(search_router) app.include_router(sse_router) app.include_router(turns_router) diff --git a/chat/templates/layout.html b/chat/templates/layout.html index 7b1954b..5ccb7c9 100644 --- a/chat/templates/layout.html +++ b/chat/templates/layout.html @@ -7,6 +7,13 @@
  • Bots
  • Settings
  • + {# T100: cross-chat search box. GET /search so the URL is shareable + and back-button friendly; the results page itself re-renders this + form with the query pre-filled. #} + + + +
    {% block content %}{% endblock %} diff --git a/chat/templates/search.html b/chat/templates/search.html new file mode 100644 index 0000000..ee61c24 --- /dev/null +++ b/chat/templates/search.html @@ -0,0 +1,37 @@ +{% extends "layout.html" %} +{% block title %}Search - chat{% endblock %} +{% block content %} + + +
    + + +
    + +{% if not query %} + {# Empty-state placeholder: the top-bar form submits to /search even + with no input, so this page must render cleanly with no query. #} +

    Enter a query to search memories across all chats.

    +{% elif not results %} +

    No matches for “{{ query }}”.

    +{% else %} + +{% endif %} +{% endblock %} diff --git a/chat/web/search.py b/chat/web/search.py new file mode 100644 index 0000000..51d75ea --- /dev/null +++ b/chat/web/search.py @@ -0,0 +1,92 @@ +"""T100 (Phase 4): cross-chat search UX route. + +Wraps T93's :func:`chat.services.cross_chat_search.search_all_memories` +in a small read-only HTML surface so the top-bar search input has +somewhere to land. The route does no filtering of its own beyond the +empty-query fast-path that T93 already implements; ranking, owner +scope, and witness scope all live in the service layer. + +For each match we hydrate just enough metadata to render a row: +* the owner bot's display name (so users see "BOTA" not "bot_a"), +* the originating ``chat_id`` (the link target — there's no per-turn + anchor today because memories don't carry an ``event_id`` column, + so we deep-link to the chat as a whole), +* the originating scene title when one exists, +* and the ``pov_summary`` itself. + +We deliberately keep this module synchronous and template-only — no +HTMX swaps, no JSON API — because the search box is a "leave the +current chat to look something up" surface, not an inline drawer. +""" + +from __future__ import annotations + +from pathlib import Path + +from fastapi import APIRouter, Depends, Request +from fastapi.responses import HTMLResponse +from fastapi.templating import Jinja2Templates + +from chat.services.cross_chat_search import search_all_memories +from chat.state.entities import get_bot +from chat.state.world import get_chat, get_scene +from chat.web.bots import get_conn + +TEMPLATES = Jinja2Templates( + directory=str(Path(__file__).resolve().parent.parent / "templates") +) + +router = APIRouter() + + +@router.get("/search", response_class=HTMLResponse) +async def search(request: Request, q: str = "", conn=Depends(get_conn)): + """Render ``search.html`` with up to 50 cross-chat FTS matches. + + ``q`` is intentionally allowed to be empty — that path renders the + page's "enter a query" placeholder rather than a 400, because the + top-bar form submits to this URL even with an empty input. T93's + service short-circuits whitespace-only queries to ``[]`` so there + is no FTS5 ``MATCH ''`` syntax error to guard against here. + """ + raw_results = search_all_memories(conn, query=q, k=50) if q else [] + + # Hydrate display fields per row. We do this in the route (not the + # service) so the service stays a pure FTS shim that other UIs + # can reuse. + results = [] + for row in raw_results: + bot = get_bot(conn, row["owner_id"]) + chat = get_chat(conn, row["chat_id"]) + scene = get_scene(conn, row["scene_id"]) if row["scene_id"] else None + results.append( + { + "memory_id": row["memory_id"], + "owner_id": row["owner_id"], + "owner_name": bot["name"] if bot else row["owner_id"], + "chat_id": row["chat_id"], + "chat_name": ( + chat.get("narrative_anchor") if chat else None + ), + "scene_id": row["scene_id"], + # Scenes have no ``title`` column today; surface the + # ``started_at`` timestamp as a human-friendly label + # when a scene is set, otherwise leave it blank. + "scene_label": ( + scene.get("started_at") if scene else None + ), + "pov_summary": row["pov_summary"], + "significance": row["significance"], + "ts": row["ts"], + } + ) + + return TEMPLATES.TemplateResponse( + request, + "search.html", + { + "query": q, + "results": results, + "active_nav": "search", + }, + ) diff --git a/tests/test_search_ux.py b/tests/test_search_ux.py new file mode 100644 index 0000000..7254549 --- /dev/null +++ b/tests/test_search_ux.py @@ -0,0 +1,135 @@ +"""T100 (Phase 4): cross-chat search UX (top-bar + results page). + +Verifies the FastAPI ``/search`` route that wraps T93's +``search_all_memories`` service: + +* ``/search?q=...`` returns 200 + an HTML page that lists matches drawn + from MULTIPLE chats (not just the current one) and links each result + back to ``/chats/{chat_id}``. +* ``/search`` with no query renders the page in its empty state with a + "enter a query" placeholder and no result rows (avoids hitting the + FTS index with an invalid empty MATCH). +* Result links navigate to the originating chat so users can pick up + the thread where the memory came from. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from chat.app import app +from chat.db.connection import open_db +from chat.eventlog.log import append_event +from chat.eventlog.projector import project +import chat.state.memory # noqa: F401 (registers memory_written handler) + + +@pytest.fixture +def client(tmp_path, monkeypatch): + config_path = tmp_path / "config.toml" + config_path.write_text('featherless_api_key = "test"\n') + monkeypatch.setenv("CHAT_CONFIG_PATH", str(config_path)) + monkeypatch.setenv("CHAT_DB_PATH", str(tmp_path / "test.db")) + with TestClient(app) as c: + yield c + + +def _seed_two_chats_with_memories(db_path: Path) -> None: + """Seed: a ``you_entity``, two bots, two chats, and one ``rabbit`` + memory per chat. Two-chat seeding lets the cross-chat assertion + actually distinguish "both chats appear" from "only the current + one does".""" + with open_db(db_path) as conn: + append_event( + conn, + kind="you_authored", + payload={"name": "Me", "pronouns": "", "persona": ""}, + ) + for bot_id, chat_id in (("bot_a", "chat_a"), ("bot_b", "chat_b")): + append_event( + conn, + kind="bot_authored", + payload={ + "id": bot_id, + "name": bot_id.upper(), + "persona": "thoughtful", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "friend", + "kickoff_prose": "kickoff", + }, + ) + append_event( + conn, + kind="chat_created", + payload={ + "id": chat_id, + "host_bot_id": bot_id, + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + append_event( + conn, + kind="memory_written", + payload={ + "owner_id": bot_id, + "chat_id": chat_id, + "pov_summary": f"the rabbit darted across {chat_id}", + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "source": "direct", + "reliability": 1.0, + "significance": 1, + "pinned": 0, + "auto_pinned": 0, + }, + ) + project(conn) + + +def test_search_returns_results_from_multiple_chats(client, tmp_path): + """A single ``/search?q=rabbit`` must surface matches from BOTH + chats — the whole point of the cross-chat search box is that it + isn't owner-scoped.""" + _seed_two_chats_with_memories(tmp_path / "test.db") + resp = client.get("/search?q=rabbit") + assert resp.status_code == 200 + body = resp.text + # Both chats' memory snippets must appear in the rendered page. + assert "chat_a" in body + assert "chat_b" in body + assert "rabbit" in body.lower() + + +def test_empty_query_renders_placeholder_not_results(client, tmp_path): + """``/search`` with no query renders the page in its empty state. + + The placeholder copy is a contract with the user — they should see + "enter a query" rather than an empty result list that looks like a + no-match. Also: the FTS short-circuit means there are no result + rows to leak into the body.""" + _seed_two_chats_with_memories(tmp_path / "test.db") + resp = client.get("/search") + assert resp.status_code == 200 + body = resp.text.lower() + assert "enter a query" in body + # Seeded "rabbit" memories must NOT appear: empty query => no results. + assert "the rabbit darted" not in resp.text + + +def test_result_links_navigate_to_chat(client, tmp_path): + """Each result links back to its originating chat so the user can + reopen the thread where the memory was first witnessed.""" + _seed_two_chats_with_memories(tmp_path / "test.db") + resp = client.get("/search?q=rabbit") + assert resp.status_code == 200 + # The link target is chat-level (memories don't carry an event_id + # column today, so we don't deep-link to a specific turn). + assert 'href="/chats/chat_a"' in resp.text -- 2.52.0 From b6119879e593bc5bdd8873e2dddb24aa571037bc Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 03:56:45 -0400 Subject: [PATCH 25/26] docs: phase 4 status, behavioral defaults, deferred items (T102) --- CLAUDE.md | 85 +++++++++++++++++++ .../2026-04-26-v1-requirements-design.md | 2 + 2 files changed, 87 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 8d80cd5..ab0a5dc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -287,3 +287,88 @@ New follow-ups discovered during Phase 3.5 reviews and execution. None are block - **Scene-close-on-cancel UX revisit** (Phase 2.5 carry-over): T74.3 pinned the existing behavior; revisit if real play-testing surfaces a regression. - **Cross-feature canned-queue brittleness**: meanwhile-scene close test required a canned response for T65's digest call after T64+T65 merge. Future close-path additions will keep extending the queue. Consider a structured fixture builder rather than positional canned arrays. NOT addressed in Phase 3.5. - **Lifecycle-transition rollback in regenerate**: T83.4 added a warning log; actual rollback (with proper schema linkage from lifecycle event back to producing turn) is Phase 4 work. + +## Phase 4 status + +Phase 4 polish shipped end-to-end across 15 tasks (T88–T102). Vector retrieval is functional via pure-Python cosine over a JSON-blob embeddings table (sqlite-vec deferred — host Python lacks loadable extensions). Branching is data-model + drawer UI. Surgical delete with cascade preview, hide-from-view soft delete, significance review panel, snapshot UX, and cross-chat search all surface from the drawer or top-bar. Test count grew from 343 (Phase 3.5) to ~413 (+70 new tests). + +- **Wave 1 — schema + Phase 3.6 carry-overs (parallel)**: + - **T88** `embeddings` table + projector handlers (pure-Python cosine, JSON-blob storage; sqlite-vec deferred). + - **T89** `branches` table + handlers (main bootstrapped; `is_active` flag; partial unique index). + - **T90** Phase 3.6 carry-overs trio — `read_recent_dialogue` chat-id SQL pushdown, lifecycle warning wording tightening, legacy `record_turn_memory` removed. +- **Wave 2 — services (parallel)**: + - **T91** embedding generation service (Phase 4 ships a deterministic SHA-256-derived pseudo-embedding; real model swap is Phase 4.5+). + - **T92** vector search service via pure-Python cosine. + - **T93** cross-chat search service (FTS5 across all owners, no witness filter — admin-style). +- **Wave 3 — services (parallel)**: + - **T94** branching service (`branch_from_event`, `switch_active_branch`, `list_branches_with_metadata`). + - **T95** delete-impact computation service (cascade preview, no DB mutation). +- **Wave 4 — combined retrieval (single)**: + - **T96** combined FTS + vector retrieval ranking via reciprocal-rank fusion (RRF, `RRF_CONST=60`); existing significance/recency boost applied as final pass. +- **Wave 5 — memory write hook + backfill (single)**: + - **T97** `EmbeddingWorker` drains queue and emits `embedding_indexed` events; `memory_write` enqueues per `memory_written`; `backfill_embeddings` script for existing memories; ALL 4 production call sites wired (turns, regenerate, meanwhile, drawer). +- **Wave 6 — drawer Phase 4 bundle (single, 5 sub-features)**: + - **T98.1** branching UI (Branches panel + 3 routes). + - **T98.2** significance review panel (distribution bar chart + per-memory edit). + - **T98.3** hide-from-view toggle + `turn_hidden` `manual_edit` branch. + - **T98.4** surgical delete with cascade preview (reuses existing rewind path; pre-rewind snapshot preserved). + - **T98.5** remaining v1 edits — `narrative_anchor` + weather drawer affordances + 2 new `manual_edit` branches. +- **Wave 7 — UX surfaces (parallel)**: + - **T99** snapshot UX (manual trigger, list, restore with hard-confirm, preview). + - **T100** cross-chat search UX (top-bar form + results page). +- **Wave 8 — polish (parallel)**: + - **T101** cross-feature integration tests (5 multi-feature scenarios). + - **T102** documentation (this section). + +### Phase 4.5 / 5 backlog + +New follow-ups discovered during Phase 4 reviews and execution. None are blocking; pick up at any time. + +#### From T88 review + +- **`embeddings` FK lacks `ON DELETE CASCADE`**: deindex events are the only deletion path; if memories ever get deleted directly (raw SQL), embedding rows orphan. Defensible since projector model uses explicit deindex events, but worth a comment or `ON DELETE CASCADE` addition. + +#### From T89 review + +- **`list_branches(chat_id=...)` filter leaks global branches** (`chat_id IS NULL`) into every chat scope. Intentional? Document. +- **Branch-switch to nonexistent silently leaves zero active branches** — log a warning when this would happen. + +#### From T91 review + +- **Real embedding model swap**: Phase 4 ships pseudo-embedding (deterministic SHA-256 hash). Phase 4.5+ should swap to a real model (Featherless `bge-small-en-v1.5` if available; or local `sentence-transformers/all-MiniLM-L6-v2`). The 384-dim is hardcoded in `0012_embeddings.sql`; if dim changes, migrate first. +- **`timeout_s` unused on pseudo path** — fine, but log when non-default model falls through to fallback so misconfigured callers don't silently degrade. + +#### From T96 review + +- **Duplicate `MAX(id)` lookup** between `_composite_rerank` and the fused-path tail — DRY follow-up. +- **`fts_rank=None` for vector-only rows** — document downstream contract. + +#### From T98 review + +- **`event_id <= 0` guard in `delete_turn`** — currently silently rewinds everything if `event_id` is 0. Add `if event_id <= 0: 400`. +- **`html.escape()` on `compute_delete_impact` output rendered into the modal** — defense in depth (currently model-controlled strings, but if event payload fields ever appear in descriptions, autoescape needed). +- **Extract delete-impact modal HTML to a Jinja partial** — testability + autoescape inheritance. + +#### From T99 review + +- **Hoist `datetime`/`timezone` imports to module level** in `chat/web/snapshots.py`. +- **`kind` defaulting in restore/preview** — reject missing `kind` rather than silent 404. +- **`created_at` from file mtime** vs filename-encoded timestamp — small drift if files copied; document. + +#### From T100 review + +- **Hardcoded `k=50`** — extract to module constant. +- **N+1 lookups (`get_bot`/`get_chat`/`get_scene` per row)** — fine at `k=50`, revisit if `k` grows. +- **FTS highlighting via `snippet()`** — Phase 4 skipped this; UX nice-to-have. +- **Result links chat-level only** — `memories` table has no `event_id` column; deep-linking to specific turn requires schema addition. + +#### Deferred items + +- **sqlite-vec swap** when host Python supports `enable_load_extension`. +- **Real embedding model** with proper semantic similarity. +- **Branching read-side filter**: T89 ships data-model + UI but event readers don't yet consult `is_active`. Each branch is metadata-only labeled ranges. Consult-on-read is Phase 4.5+ work. +- **Bulk significance re-rate** in drawer (T98.2 deferred — only per-memory edit shipped). +- **Vector index optimization** (HNSW) — only relevant if memory counts grow past pure-Python feasibility. +- **`scene-close-on-cancel` UX revisit** (Phase 2.5 carry-over). +- **Cross-feature canned-queue brittleness fixture builder** (Phase 3 carry-over). +- **Full lifecycle-rollback in regenerate** — Phase 3.5 T83.4 shipped a warning log; proper rollback needs schema-level back-references (`triggered_by_assistant_turn_id` payload field). diff --git a/docs/plans/2026-04-26-v1-requirements-design.md b/docs/plans/2026-04-26-v1-requirements-design.md index f84b2cb..5db1623 100644 --- a/docs/plans/2026-04-26-v1-requirements-design.md +++ b/docs/plans/2026-04-26-v1-requirements-design.md @@ -520,6 +520,8 @@ Written per witness when a scene closes. Different details, different interpreta ### Phase 4 — polish +**Status: shipped 2026-04-27** (T88–T102, 15 tasks across 8 waves; +70 tests). See "Phase 4 status" in CLAUDE.md for the per-task breakdown. Vector retrieval shipped via pure-Python cosine over a JSON-blob embeddings table (sqlite-vec deferred — host Python lacks loadable extensions); branching is data-model + drawer UI; significance review, hide-from-view soft delete, surgical delete with cascade preview, snapshot UX, and cross-chat search all surface from the drawer or top-bar. + - Vector retrieval (sqlite-vss or sqlite-vec). - Branching UI. - Drawer-edit on every field. -- 2.52.0 From 228f9abb191136215d651cf44cdc95c86a928762 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 04:08:25 -0400 Subject: [PATCH 26/26] test: phase 4 cross-feature integration coverage (T101) --- tests/test_phase4_integration.py | 743 ++++++++++++++++++++++++++++++- 1 file changed, 727 insertions(+), 16 deletions(-) diff --git a/tests/test_phase4_integration.py b/tests/test_phase4_integration.py index ee30f07..489c008 100644 --- a/tests/test_phase4_integration.py +++ b/tests/test_phase4_integration.py @@ -1,20 +1,38 @@ -"""Phase 4 cross-feature integration tests (T97 follow-up). +"""Phase 4 cross-feature integration tests (T97 follow-up + T101). -Wave 8 / T101 will populate this file with the full Phase 4 retrieval + -embedding integration suite. For now this houses a single test pinning -the T97.5 wiring: the production turn route plumbs ``app=request.app`` -all the way through ``record_turn_memory_for_present`` so the embedding -worker actually receives jobs in production. Without this fix-up the -plumbing added in T97 was dormant — every per-witness write took the -no-app branch and silently dropped the embed enqueue. +Cross-feature flows for the Phase 4 retrieval + branching + drawer +features. Each test drives multiple Phase 4 surfaces end-to-end and +asserts both event_log and projected-state outcomes. -The test monkeypatches ``app.state.embedding_worker.enqueue`` to record -jobs (rather than draining the worker mid-test) so the assertion is -deterministic and free of asyncio-timing flakiness inside FastAPI's -TestClient. The bug we're guarding against is "did the call site pass -``app`` at all" — the worker's drain path is exercised in -:mod:`tests.test_embedding_worker`, so duplicating that here would add -no coverage. +Test inventory: + +* ``test_post_turn_embeddings_indexed_via_worker_hook`` (T97.5) — + pins the production turn route's ``app=request.app`` plumbing so + the embedding worker actually receives jobs. + +T101 additions (the "Phase 4 cross-feature integration" suite): + +1. ``test_vector_retrieval_feedback_loop`` — write a memory, drain + the embedding worker, assert the vector path retrieves it. +2. ``test_branch_diverge_main_intact`` — create a branch from a + mid-log turn, switch, append more events, switch back and assert + the original log past the branch point is still present (Phase 4 + branching is metadata-only — no read-side filter yet). +3. ``test_surgical_delete_truncates_log_and_writes_snapshot`` — + compute impact, confirm via the drawer route, assert the log was + truncated and a pre-rewind snapshot landed on disk. +4. ``test_hide_then_unhide_round_trip_through_read_recent_dialogue`` + — flip ``hidden`` via the drawer route both directions and assert + ``read_recent_dialogue`` honours the flag in real time. +5. ``test_cross_chat_search_surfaces_memories_in_three_chats`` — + write memories in 3 chats, hit ``/search?q=...`` and assert all + three appear. + +The T97.5 test monkeypatches ``app.state.embedding_worker.enqueue`` to +record jobs (rather than draining the worker) because the bug it pins +is "did the call site pass ``app`` at all". T101 test 1 takes the +opposite tack: it drives the worker for real to verify the entire +write -> index -> retrieve loop. """ from __future__ import annotations @@ -27,7 +45,7 @@ from fastapi.testclient import TestClient from chat.app import app from chat.db.connection import open_db -from chat.eventlog.log import append_event +from chat.eventlog.log import append_and_apply, append_event from chat.eventlog.projector import project from chat.llm.mock import MockLLMClient @@ -178,3 +196,696 @@ def test_post_turn_embeddings_indexed_via_worker_hook( ).fetchall() ] assert job.memory_id in memory_ids + + +# --------------------------------------------------------------------------- +# T101 — Phase 4 cross-feature integration suite. +# --------------------------------------------------------------------------- +# +# Helpers + the five required scenarios. Each test drives multiple Phase 4 +# features so a regression in any one of them fails an integration check. + + +def _seed_minimal_chat(db_path: Path, chat_id: str = "chat_bot_a") -> None: + """Seed bot_a, you, a chat, edges, and activities — same shape as + ``tests/test_phase3_integration.py::_seed_single_bot_chat`` but + parameterised on chat_id so the cross-chat search test can stamp + several chats in the same database without renaming bots. + + Uses ``append_and_apply`` rather than ``append_event`` + a final + ``project`` so successive calls (e.g. one per chat in the + cross-chat-search test) don't try to re-project the cumulative + log and trip the ``chats.id`` UNIQUE constraint on the prior + chat's row. + """ + with open_db(db_path) as conn: + existing_bot = conn.execute( + "SELECT 1 FROM bots WHERE id = 'bot_a'" + ).fetchone() + if existing_bot is None: + append_and_apply( + conn, + kind="bot_authored", + payload={ + "id": "bot_a", + "name": "BotA", + "persona": "thoughtful", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "", + "kickoff_prose": "...", + }, + ) + append_and_apply( + conn, + kind="you_authored", + payload={ + "name": "Me", + "pronouns": "they/them", + "persona": "", + }, + ) + append_and_apply( + conn, + kind="chat_created", + payload={ + "id": chat_id, + "host_bot_id": "bot_a", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + append_and_apply( + conn, + kind="edge_update", + payload={ + "source_id": "bot_a", + "target_id": "you", + "chat_id": chat_id, + "knowledge_facts": [], + }, + ) + # Activities are unique per (entity_id) — only seed them on the + # first call (when the bot row is also fresh). + if existing_bot is None: + for entity_id, verb in [ + ("you", "talking"), + ("bot_a", "listening"), + ]: + append_and_apply( + conn, + kind="activity_change", + payload={ + "entity_id": entity_id, + "posture": "sitting", + "action": { + "verb": verb, + "interruptible": True, + "required_attention": "low", + "expected_duration": "ongoing", + }, + "attention": "", + "holding": [], + "status": {}, + }, + ) + + +# --------------------------------------------------------------------------- +# 1. Vector retrieval feedback loop. +# --------------------------------------------------------------------------- + + +async def test_vector_retrieval_feedback_loop(tmp_path): + """End-to-end: write a memory through + :func:`record_turn_memory_for_present` so an :class:`EmbeddingJob` + lands on a worker, drain the worker, then call + :func:`vector_search` with the SAME pseudo-embedding function and + assert the just-written memory is the top hit. + + Why this test does NOT use the TestClient fixture: the live + ``app.state.embedding_worker`` is created inside the FastAPI + lifespan's event loop. ``await``-ing on it from pytest-asyncio's + loop trips ``"got Future attached to a different loop"``. We + instead spin up a fresh :class:`EmbeddingWorker` in the test + loop, exactly mirroring ``tests/test_embedding_worker.py``'s + pattern. The T97.5 test above pins the wiring between the live + HTTP route and the live app worker; this test pins the + write -> index -> retrieve loop with no transport in scope. + + Cross-feature gaps this test catches: + * Memory write enqueues to the worker but the worker never + drains (e.g. ``_run`` deadlock or sentinel mishandled). + * Worker uses a different embedding function than + ``vector_search`` at query time, producing different vectors + and breaking cosine retrieval. + * ``embeddings`` projector handler is not registered (e.g. + import ordering bug) so the event fires but the table stays + empty. + """ + from types import SimpleNamespace + + from chat.db.migrate import apply_migrations + from chat.services.embedding_worker import EmbeddingWorker + from chat.services.embeddings import generate_embedding + from chat.services.memory_write import record_turn_memory_for_present + from chat.services.vector_search import vector_search + + # Trigger projector handler registration. ``record_turn_memory_for_present`` + # imports memory_write which imports the worker module, but the + # projector handlers live in ``chat.state.*`` modules and are + # registered as a side effect of import. + import chat.state.embeddings # noqa: F401 + import chat.state.entities # noqa: F401 + import chat.state.memory # noqa: F401 + import chat.state.world # noqa: F401 + + db = tmp_path / "test.db" + apply_migrations(db) + _seed_minimal_chat(db) + + # Spin up our own worker in the test event loop. ``client=None`` + # is fine for the pseudo-embedding path — the local hash function + # does not require an LLM client. + worker = EmbeddingWorker( + conn_factory=lambda: open_db(db), + client=None, + ) + await worker.start() + + # Stub ``app`` — only ``app.state.embedding_worker`` is read by + # ``_write_one_memory``. SimpleNamespace gives us a stand-in that + # exposes ``state.embedding_worker`` without the full FastAPI app. + fake_app = SimpleNamespace(state=SimpleNamespace(embedding_worker=worker)) + + distinctive_text = "Maya watched the gondola lights drift across the lagoon." + with open_db(db) as conn: + record_turn_memory_for_present( + conn, + chat_id="chat_bot_a", + host_bot_id="bot_a", + guest_bot_id=None, + narrative_text=distinctive_text, + app=fake_app, + ) + + # Drain the worker via the sentinel. After this returns the + # ``embedding_indexed`` event has been projected. + await worker.stop() + + # Generate a query embedding using the same function the worker + # used. The pseudo-embedding is deterministic so a query equal to + # the indexed text produces the identical vector and a cosine + # similarity of 1.0. + query_result = await generate_embedding(client=None, text=distinctive_text) + + with open_db(db) as conn: + emb_count = conn.execute( + "SELECT COUNT(*) FROM embeddings" + ).fetchone()[0] + assert emb_count == 1, ( + "embedding worker did not project an embedding_indexed event" + ) + + hits = vector_search( + conn, + owner_id="bot_a", + witness_role="host", # bot_a is host, witness_host=1 by default + query_vector=query_result.vector, + k=4, + ) + assert len(hits) == 1 + top = hits[0] + assert top["pov_summary"] == distinctive_text + # Self-match: cosine of identical vectors is 1.0. + assert top["score"] == pytest.approx(1.0, abs=1e-9) + + +# --------------------------------------------------------------------------- +# 2. Branch + diverge: main's post-branch tail stays intact (Phase 4 +# branches are metadata-only). +# --------------------------------------------------------------------------- + + +def test_branch_diverge_main_intact(app_state_setup, tmp_path): + """Append turns 1-12 on main, branch from turn 10's event_id, switch + to the new branch, append 3 more "play" turns, switch back to main, + assert the original turn 11+ events are untouched. + + Phase 4's branches table is metadata-only — the read-side filter + isn't wired yet, so all events live in one log regardless of which + branch is "active". This test pins that contract: switching does + not mutate or hide existing events on either branch. + + Canned LLM queue: none. ``user_turn`` / ``assistant_turn`` are + transcript-only kinds with no projector handler that needs an + LLM call, and ``branch_created`` / ``branch_switched`` are pure + state events. We use ``append_and_apply`` directly rather than + driving the HTTP turn route, which would require a 6-slot canned + queue per turn (parse + narrative + 2 state-updates + scene-close + + memory) for 15 turns total = 90 slots of plumbing irrelevant to + the branch contract. + """ + from chat.services.branching import branch_from_event, switch_active_branch + from chat.state.branches import active_branch + + db = tmp_path / "test.db" + _seed_minimal_chat(db) + + # Append 12 user_turn / assistant_turn pairs on main. We collect + # the assistant_turn id at index 10 (1-based: "turn 10") so the + # branch fork point is unambiguous. + main_turn_ids: list[int] = [] + with open_db(db) as conn: + for i in range(1, 13): + user_id = append_and_apply( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": f"main turn {i}", + "segments": [], + }, + ) + asst_id = append_and_apply( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": f"main reply {i}", + "truncated": False, + "user_turn_id": user_id, + }, + ) + main_turn_ids.append(asst_id) + turn_10_id = main_turn_ids[9] + + # Snapshot the post-turn-10 main tail (turns 11, 12 + their + # user_turn predecessors) so we can byte-compare after the + # round-trip. + main_tail_before = conn.execute( + "SELECT id, kind, payload_json, hidden, superseded_by " + "FROM event_log WHERE id > ? ORDER BY id", + (turn_10_id,), + ).fetchall() + assert len(main_tail_before) == 4 # 2 user + 2 assistant past turn 10 + + # Branch from turn 10. Phase 4's helper validates the origin + # event id exists and emits ``branch_created``. + branch_from_event( + conn, + name="experiment", + origin_event_id=turn_10_id, + chat_id="chat_bot_a", + ) + switch_active_branch(conn, name="experiment") + active = active_branch(conn) + assert active is not None and active["name"] == "experiment" + + # Play 3 turns on the experiment branch. + for i in range(1, 4): + user_id = append_and_apply( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": f"experiment turn {i}", + "segments": [], + }, + ) + append_and_apply( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": f"experiment reply {i}", + "truncated": False, + "user_turn_id": user_id, + }, + ) + + # Switch back to main. + switch_active_branch(conn, name="main") + active2 = active_branch(conn) + assert active2 is not None and active2["name"] == "main" + + # Main's original tail past turn 10 is byte-identical: the + # branching events (branch_created, branch_switched x2) and the + # 3 experiment turns sit AFTER the original tail in event_log + # order, never overwriting it. + main_tail_after = conn.execute( + "SELECT id, kind, payload_json, hidden, superseded_by " + "FROM event_log " + "WHERE id > ? AND id <= ? ORDER BY id", + (turn_10_id, main_turn_ids[-1]), + ).fetchall() + assert main_tail_after == main_tail_before + + # The 6 experiment events (3 user + 3 assistant) all live in + # the same log past the original main tail. Verify their + # prose payloads to disambiguate from main's content. + diverged = conn.execute( + "SELECT kind, json_extract(payload_json, '$.prose'), " + " json_extract(payload_json, '$.text') " + "FROM event_log WHERE id > ? " + " AND kind IN ('user_turn', 'assistant_turn') ORDER BY id", + (main_turn_ids[-1],), + ).fetchall() + assert len(diverged) == 6 + prose_or_text = [(row[1] or row[2]) for row in diverged] + # Sequence: user1, asst1, user2, asst2, user3, asst3. + assert "experiment turn 1" in prose_or_text + assert "experiment reply 1" in prose_or_text + assert "experiment turn 3" in prose_or_text + assert "experiment reply 3" in prose_or_text + + +# --------------------------------------------------------------------------- +# 3. Surgical delete: impact preview -> confirm -> log truncated + +# pre-rewind snapshot saved. +# --------------------------------------------------------------------------- + + +def test_surgical_delete_truncates_log_and_writes_snapshot( + app_state_setup, tmp_path +): + """Compute the delete-impact for a turn (read-only preview), then + confirm via the POST drawer route. Assert: + + * The preview returns 200 + cascade markup. + * The event_log is physically truncated past ``target_id - 1``. + * A snapshot file lands under ``/snapshots/rewind/``. + * The pre-rewind snapshot's ``last_event_id`` matches the high + water mark BEFORE the truncate (so recovery can replay back to + pre-delete state). + + Snapshot location: T97.5's ``data_dir`` derives from the db's + parent directory when ``CHAT_DATA_DIR`` is unset. The fixture + sets ``CHAT_DB_PATH = tmp_path / "test.db"`` so the snapshot + parent is ``tmp_path / "snapshots" / "rewind"``. + + No canned LLM queue — the preview is pure SQL and the rewind path + is also pure SQL (delete + reproject). The drawer routes don't + invoke the LLM. + """ + import json as _json + + db = tmp_path / "test.db" + _seed_minimal_chat(db) + + # Append a small fixed turn sequence we can predict the cascade for. + with open_db(db) as conn: + first_user = append_and_apply( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": "first message", + "segments": [], + }, + ) + append_and_apply( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": "first reply", + "truncated": False, + "user_turn_id": first_user, + }, + ) + target_user = append_and_apply( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": "this turn will be deleted", + "segments": [], + }, + ) + target_asst = append_and_apply( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": "and so will this reply", + "truncated": False, + "user_turn_id": target_user, + }, + ) + # One trailing event past the target so we can verify the + # cascade catches >1 event. + trailing = append_and_apply( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": "trailing context", + "segments": [], + }, + ) + max_id_before = conn.execute( + "SELECT MAX(id) FROM event_log" + ).fetchone()[0] + + # ---- Preview: GET delete-preview returns 200 + the cascade list. ---- + preview = app_state_setup.get( + f"/chats/chat_bot_a/drawer/turn/delete-preview/{target_user}" + ) + assert preview.status_code == 200 + body = preview.text + assert "delete-impact-modal" in body + assert f"Delete event {target_user}?" in body + assert "user_turn" in body + assert "assistant_turn" in body + # Confirm form points at the delete route. + assert f"/drawer/turn/delete/{target_user}" in body + + # ---- Confirm: POST delete drops user, assistant, AND trailing. ---- + confirm = app_state_setup.post( + f"/chats/chat_bot_a/drawer/turn/delete/{target_user}" + ) + assert confirm.status_code == 200 + + # ---- Event log truncated past target_user - 1. ---- + with open_db(db) as conn: + max_id_after = conn.execute( + "SELECT MAX(id) FROM event_log" + ).fetchone()[0] + # delete_turn passes ``after_event_id = target_user - 1`` so + # everything from target_user forward is gone. + assert max_id_after == target_user - 1 + for ev_id in (target_user, target_asst, trailing): + row = conn.execute( + "SELECT 1 FROM event_log WHERE id = ?", (ev_id,) + ).fetchone() + assert row is None, f"event {ev_id} should have been deleted" + + # ---- Pre-rewind snapshot landed on disk. ---- + snapshot_dir = tmp_path / "snapshots" / "rewind" + assert snapshot_dir.exists(), ( + f"snapshot dir not created: {snapshot_dir}" + ) + snapshots = sorted(snapshot_dir.glob("*.json")) + assert len(snapshots) >= 1, ( + f"no rewind snapshot written under {snapshot_dir}" + ) + # Most-recent snapshot's last_event_id == pre-truncate high water + # mark, so a "restore" path could fully reverse the delete. + latest_snapshot = snapshots[-1] + snap_data = _json.loads(latest_snapshot.read_text()) + assert snap_data["last_event_id"] == max_id_before + + +# --------------------------------------------------------------------------- +# 4. Hide + retrieval: drawer hide drops a turn from read_recent_dialogue, +# unhide restores it. +# --------------------------------------------------------------------------- + + +def test_hide_then_unhide_round_trip_through_read_recent_dialogue( + app_state_setup, tmp_path +): + """Drive a hide -> read -> unhide -> read cycle through the drawer + HTTP route and assert ``read_recent_dialogue`` flips visibility + each step. T98.3 wires the route; T55 / turn_common owns the + ``hidden = 0`` filter. + + Cross-feature: the drawer HTTP handler emits a ``manual_edit`` + event with branch ``turn_hidden``, the manual_edit projector + flips ``event_log.hidden``, and the prompt-window reader filters + on that column. Three layers — any one breaking would fail this + test. + + No canned LLM queue — hide/unhide are pure SQL routes. + """ + from chat.services.turn_common import read_recent_dialogue + + db = tmp_path / "test.db" + _seed_minimal_chat(db) + + with open_db(db) as conn: + user_a = append_and_apply( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": "first user line", + "segments": [], + }, + ) + asst_a = append_and_apply( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": "first reply", + "truncated": False, + "user_turn_id": user_a, + }, + ) + user_b = append_and_apply( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_bot_a", + "prose": "second user line", + "segments": [], + }, + ) + asst_b = append_and_apply( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_bot_a", + "speaker_id": "bot_a", + "text": "second reply", + "truncated": False, + "user_turn_id": user_b, + }, + ) + + # Baseline: all 4 turns visible. + baseline = read_recent_dialogue(conn, "chat_bot_a", limit=10) + baseline_ids = {t["event_id"] for t in baseline} + assert {user_a, asst_a, user_b, asst_b} <= baseline_ids + + # ---- Hide user_b via the drawer route. ---- + hide_resp = app_state_setup.post( + f"/chats/chat_bot_a/drawer/turn/hide/{user_b}", + data={"hidden": "1"}, + ) + assert hide_resp.status_code == 200 + + with open_db(db) as conn: + # event_log.hidden flipped. + row = conn.execute( + "SELECT hidden FROM event_log WHERE id = ?", (user_b,) + ).fetchone() + assert int(row[0]) == 1 + + # read_recent_dialogue drops user_b but keeps the others. + after_hide = read_recent_dialogue(conn, "chat_bot_a", limit=10) + after_hide_ids = {t["event_id"] for t in after_hide} + assert user_b not in after_hide_ids + # The other 3 turns still surface. + assert {user_a, asst_a, asst_b} <= after_hide_ids + + # ---- Unhide via the SAME route with hidden=0. ---- + unhide_resp = app_state_setup.post( + f"/chats/chat_bot_a/drawer/turn/hide/{user_b}", + data={"hidden": "0"}, + ) + assert unhide_resp.status_code == 200 + + with open_db(db) as conn: + row = conn.execute( + "SELECT hidden FROM event_log WHERE id = ?", (user_b,) + ).fetchone() + assert int(row[0]) == 0 + + # read_recent_dialogue restores user_b. + after_unhide = read_recent_dialogue(conn, "chat_bot_a", limit=10) + after_unhide_ids = {t["event_id"] for t in after_unhide} + assert {user_a, asst_a, user_b, asst_b} <= after_unhide_ids + + # Two manual_edit events landed (one per toggle), each with the + # turn_hidden branch tag. + edits = conn.execute( + "SELECT payload_json FROM event_log " + "WHERE kind = 'manual_edit' " + " AND json_extract(payload_json, '$.target_kind') = 'turn_hidden' " + "ORDER BY id" + ).fetchall() + assert len(edits) == 2 + + +# --------------------------------------------------------------------------- +# 5. Cross-chat search: memories across 3 chats all surface from /search. +# --------------------------------------------------------------------------- + + +def test_cross_chat_search_surfaces_memories_in_three_chats( + app_state_setup, tmp_path +): + """Seed 3 chats each owned by bot_a (so the bot row exists for the + search route's display-name hydration), write a distinctive + memory in each, then GET ``/search?q=`` and assert + every chat appears as a result row. + + Cross-feature: T93's :func:`search_all_memories` (no per-owner + filter) + T100's HTML route (display-name hydration via + ``get_bot``/``get_chat``). The route's empty-query short-circuit + is incidentally exercised by the request setup but isn't the + focus. + + No canned LLM queue — memory_written events are projected directly + via ``append_and_apply`` and the search route is pure SQL + + template rendering. + """ + db = tmp_path / "test.db" + # Three chats, all hosted by bot_a so bot_a is the owner of all + # three memories. _seed_minimal_chat skips the bot/you bootstrap + # after the first call so the cumulative seed is consistent. + chat_ids = ["chat_bot_a", "chat_bot_a_2", "chat_bot_a_3"] + for chat_id in chat_ids: + _seed_minimal_chat(db, chat_id=chat_id) + + # Distinctive token — "wisteria" appears nowhere else in the seed. + distinctive = "wisteria" + with open_db(db) as conn: + for idx, chat_id in enumerate(chat_ids): + append_and_apply( + conn, + kind="memory_written", + payload={ + "owner_id": "bot_a", + "chat_id": chat_id, + "pov_summary": ( + f"the {distinctive} bloomed by the gate (chat {idx})" + ), + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "source": "direct", + "reliability": 1.0, + "significance": 1, + "pinned": 0, + "auto_pinned": 0, + }, + ) + + # ---- GET /search?q=wisteria -> all 3 chats appear as result rows. ---- + response = app_state_setup.get(f"/search?q={distinctive}") + assert response.status_code == 200 + body = response.text + + # Each chat_id appears in a result link href, e.g. + # ``href="/chats/chat_bot_a"``. The template renders one + # ```` per + # row, so a substring match per chat is sufficient. + for chat_id in chat_ids: + assert f'href="/chats/{chat_id}"' in body, ( + f"chat {chat_id} missing from /search results: {body!r}" + ) + # The owner display name (BotA) renders for each row — verify >= 3 + # occurrences so we know all 3 result rows hydrated, not just 1. + assert body.count("BotA") >= 3 + + # ---- Sanity: distractor query yields no results. ---- + distractor_response = app_state_setup.get( + "/search?q=nonexistentterm12345" + ) + assert distractor_response.status_code == 200 + distractor_body = distractor_response.text + # The "no matches" empty-state copy fires. + assert "No matches" in distractor_body + for chat_id in chat_ids: + assert f'href="/chats/{chat_id}"' not in distractor_body -- 2.52.0