From 994728b5ed76571f2f7c4c73197cc2e0f1b3ff74 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:05:29 -0400 Subject: [PATCH 01/17] refactor: open_db with check_same_thread parameter (T68) --- chat/db/connection.py | 4 +-- chat/web/bots.py | 11 ++----- tests/test_open_db_threading.py | 57 +++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 11 deletions(-) create mode 100644 tests/test_open_db_threading.py diff --git a/chat/db/connection.py b/chat/db/connection.py index ad21a01..f293aca 100644 --- a/chat/db/connection.py +++ b/chat/db/connection.py @@ -5,9 +5,9 @@ from pathlib import Path @contextmanager -def open_db(path: Path): +def open_db(path: Path, *, check_same_thread: bool = True): path.parent.mkdir(parents=True, exist_ok=True) - conn = sqlite3.connect(path) + conn = sqlite3.connect(path, check_same_thread=check_same_thread) conn.execute("PRAGMA journal_mode=WAL") conn.execute("PRAGMA foreign_keys=ON") try: diff --git a/chat/web/bots.py b/chat/web/bots.py index d9666f3..027130b 100644 --- a/chat/web/bots.py +++ b/chat/web/bots.py @@ -1,10 +1,10 @@ from __future__ import annotations -import sqlite3 from pathlib import Path from fastapi import APIRouter, Depends, Form, HTTPException, Request from fastapi.responses import RedirectResponse, HTMLResponse from fastapi.templating import Jinja2Templates +from chat.db.connection import open_db from chat.eventlog.log import append_event from chat.eventlog.projector import project from chat.state.entities import list_bots @@ -19,15 +19,8 @@ REQUIRED_FIELDS = ("id", "name", "persona", "initial_relationship_to_you", "kick def get_conn(request: Request): settings = request.app.state.settings db_path: Path = settings.db_path - db_path.parent.mkdir(parents=True, exist_ok=True) - conn = sqlite3.connect(db_path, check_same_thread=False) - conn.execute("PRAGMA journal_mode=WAL") - conn.execute("PRAGMA foreign_keys=ON") - try: + with open_db(db_path, check_same_thread=False) as conn: yield conn - conn.commit() - finally: - conn.close() def _split_voice_samples(text: str) -> list[str]: diff --git a/tests/test_open_db_threading.py b/tests/test_open_db_threading.py new file mode 100644 index 0000000..c7756f9 --- /dev/null +++ b/tests/test_open_db_threading.py @@ -0,0 +1,57 @@ +from __future__ import annotations +import sqlite3 +import threading + +from chat.db.connection import open_db + + +def test_open_db_default_uses_check_same_thread_true(tmp_path): + """Default open_db must reject cross-thread use (safe default).""" + db = tmp_path / "t.db" + captured: list[BaseException | None] = [] + + with open_db(db) as conn: + conn.execute("CREATE TABLE t (x INTEGER)") + + def worker(): + try: + conn.execute("SELECT 1").fetchall() + captured.append(None) + except BaseException as e: # noqa: BLE001 + captured.append(e) + + t = threading.Thread(target=worker) + t.start() + t.join() + + assert len(captured) == 1 + err = captured[0] + assert isinstance(err, sqlite3.ProgrammingError), ( + f"expected sqlite3.ProgrammingError on cross-thread use, got {err!r}" + ) + + +def test_open_db_can_disable_check_same_thread(tmp_path): + """open_db(check_same_thread=False) must allow cross-thread use.""" + db = tmp_path / "t.db" + captured: list[BaseException | None] = [] + rows: list[object] = [] + + with open_db(db, check_same_thread=False) as conn: + conn.execute("CREATE TABLE t (x INTEGER)") + conn.execute("INSERT INTO t (x) VALUES (42)") + + def worker(): + try: + result = conn.execute("SELECT x FROM t").fetchall() + rows.extend(result) + captured.append(None) + except BaseException as e: # noqa: BLE001 + captured.append(e) + + t = threading.Thread(target=worker) + t.start() + t.join() + + assert captured == [None], f"worker thread raised: {captured}" + assert rows == [(42,)] -- 2.52.0 From c1e419e01201e2643cacb2f07797c882108e5528 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:06:21 -0400 Subject: [PATCH 02/17] fix: bot_reset purges orphaned 'you' activity rows (T69) --- chat/state/entities.py | 13 ++- tests/test_reset.py | 183 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+), 2 deletions(-) diff --git a/chat/state/entities.py b/chat/state/entities.py index 7a00edc..47e8093 100644 --- a/chat/state/entities.py +++ b/chat/state/entities.py @@ -48,6 +48,17 @@ def _apply_bot_reset(conn: Connection, e: Event) -> None: "SELECT id FROM chats WHERE host_bot_id = ?", (bot_id,) ).fetchall() ] + # T69: purge orphaned "you" activity rows pointing at containers in this + # bot's chats BEFORE the containers/chats themselves are deleted, otherwise + # the subqueries find nothing and the FK constraint on activity.container_id + # blocks the container delete. + conn.execute( + "DELETE FROM activity WHERE entity_id = 'you' " + "AND container_id IN (SELECT id FROM containers WHERE chat_id IN (" + " SELECT id FROM chats WHERE host_bot_id = ?" + "))", + (bot_id,), + ) for chat_id in chat_ids: conn.execute("DELETE FROM scenes WHERE chat_id = ?", (chat_id,)) conn.execute("DELETE FROM containers WHERE chat_id = ?", (chat_id,)) @@ -74,8 +85,6 @@ def _apply_bot_reset(conn: Connection, e: Event) -> None: (bot_id,), ) # NOTE: bots row itself is preserved (identity, kickoff_prose intact). - # NOTE: "you" activity (entity_id="you") may linger from a deleted chat; - # acceptable for v1 — Phase 1.5 cleanup if needed. def get_bot(conn: Connection, bot_id: str) -> dict | None: diff --git a/tests/test_reset.py b/tests/test_reset.py index abfd835..797bce4 100644 --- a/tests/test_reset.py +++ b/tests/test_reset.py @@ -292,6 +292,189 @@ def test_reset_clears_guest_reference_in_other_chats(client, tmp_path): ).fetchone()[0] == 1 +def test_reset_purges_orphaned_you_activity_rows(client, tmp_path): + """T69: when a bot's chats are deleted, "you" activity rows tied to those + chats' containers should also be purged (otherwise they linger orphaned).""" + db = tmp_path / "test.db" + with open_db(db) as conn: + append_event( + conn, + kind="bot_authored", + payload={ + "id": "bot_a", + "name": "BotA", + "persona": "thoughtful", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "coworker", + "kickoff_prose": "", + }, + ) + append_event( + conn, + kind="chat_created", + payload={ + "id": "chat_bot_a", + "host_bot_id": "bot_a", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + append_event( + conn, + kind="container_created", + payload={ + "chat_id": "chat_bot_a", + "name": "office", + "type": "workplace", + "properties": {}, + }, + ) + append_event( + conn, + kind="activity_change", + payload={ + "entity_id": "you", + "container_id": 1, + "posture": "standing", + "action": {"verb": "watching"}, + }, + ) + project(conn) + # Sanity: the "you" activity row exists and points at the container. + assert conn.execute( + "SELECT COUNT(*) FROM activity WHERE entity_id = 'you'" + ).fetchone()[0] == 1 + + response = client.post( + "/bots/bot_a/reset", + data={"confirm_name": "BotA"}, + follow_redirects=False, + ) + assert response.status_code == 303 + + with open_db(db) as conn: + # The orphaned "you" activity row tied to bot_a's purged container is gone. + assert conn.execute( + "SELECT COUNT(*) FROM activity WHERE entity_id = 'you'" + ).fetchone()[0] == 0 + + +def test_reset_does_not_purge_you_activity_in_other_chats(client, tmp_path): + """T69: resetting bot_a must leave a "you" activity row pointing at + bot_b's container intact — only orphans from the reset bot's chats go.""" + db = tmp_path / "test.db" + with open_db(db) as conn: + # bot_a + its chat + container. + append_event( + conn, + kind="bot_authored", + payload={ + "id": "bot_a", + "name": "BotA", + "persona": "thoughtful", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "coworker", + "kickoff_prose": "", + }, + ) + append_event( + conn, + kind="chat_created", + payload={ + "id": "chat_bot_a", + "host_bot_id": "bot_a", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + append_event( + conn, + kind="container_created", + payload={ + "chat_id": "chat_bot_a", + "name": "office", + "type": "workplace", + "properties": {}, + }, + ) + # bot_b + its chat + container. + append_event( + conn, + kind="bot_authored", + payload={ + "id": "bot_b", + "name": "BotB", + "persona": "curious", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "friend", + "kickoff_prose": "", + }, + ) + append_event( + conn, + kind="chat_created", + payload={ + "id": "chat_bot_b", + "host_bot_id": "bot_b", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + append_event( + conn, + kind="container_created", + payload={ + "chat_id": "chat_bot_b", + "name": "kitchen", + "type": "home", + "properties": {}, + }, + ) + # The activity table is keyed on entity_id (PRIMARY KEY), so only one + # "you" row exists at a time. Point it at bot_b's container so reset of + # bot_a should NOT touch it. + append_event( + conn, + kind="activity_change", + payload={ + "entity_id": "you", + "container_id": 2, # kitchen, in chat_bot_b + "posture": "sitting", + "action": {"verb": "reading"}, + }, + ) + project(conn) + # Sanity: the "you" activity row is in bot_b's container. + row = conn.execute( + "SELECT container_id FROM activity WHERE entity_id = 'you'" + ).fetchone() + assert row is not None and row[0] == 2 + + response = client.post( + "/bots/bot_a/reset", + data={"confirm_name": "BotA"}, + follow_redirects=False, + ) + assert response.status_code == 303 + + with open_db(db) as conn: + # The "you" activity in bot_b's container is preserved. + row = conn.execute( + "SELECT container_id FROM activity WHERE entity_id = 'you'" + ).fetchone() + assert row is not None + assert row[0] == 2 + + def test_reset_purges_guest_memories_from_other_chats(client, tmp_path): db = tmp_path / "test.db" _seed_two_bots_with_guest_link( -- 2.52.0 From 13c23fd8981e777099c5c9dc55bcbf1dfd59c12e Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:07:12 -0400 Subject: [PATCH 03/17] feat: LLM-merged group meta-summary (T70) --- chat/services/scene_summarize.py | 82 +++++++++++++-- tests/test_per_pov_summary.py | 175 ++++++++++++++++++++++++++++++- 2 files changed, 244 insertions(+), 13 deletions(-) diff --git a/chat/services/scene_summarize.py b/chat/services/scene_summarize.py index d57f7e2..2e74ddf 100644 --- a/chat/services/scene_summarize.py +++ b/chat/services/scene_summarize.py @@ -334,26 +334,92 @@ async def apply_scene_close_summary( timeout_s=timeout_s, ) - # Group node update: naive per-POV concat for v2. Only fires when - # both POVs ran (i.e. the guest is present) and a group_node row - # exists for this chat. + # Group node update: T70 runs a third classifier call to merge the + # two per-POV summaries into a coherent group-level view + a brief + # group-dynamic note. Falls back to the Phase 2 naive concat on + # classifier failure (see :func:`merge_group_summary`). Only fires + # when both POVs ran (i.e. the guest is present) and a group_node + # row exists for this chat. if guest_pov is not None and get_group_node(conn, chat_id) is not None: host_bot = get_bot(conn, host_bot_id) or {"name": host_bot_id} guest_bot = get_bot(conn, guest_bot_id) or {"name": guest_bot_id} host_name = host_bot.get("name", host_bot_id) or host_bot_id guest_name = guest_bot.get("name", guest_bot_id) or guest_bot_id - group_summary = ( - f"{host_name}: {host_pov.summary}\n\n" - f"{guest_name}: {guest_pov.summary}" + merged = await merge_group_summary( + client, + classifier_model=classifier_model, + host_name=host_name, + host_pov_summary=host_pov.summary, + guest_name=guest_name, + guest_pov_summary=guest_pov.summary, + timeout_s=timeout_s, ) append_and_apply( conn, kind="group_node_updated", payload={ "chat_id": chat_id, - "summary": group_summary, - "dynamic": "", + "summary": merged.summary, + "dynamic": merged.dynamic, }, ) return host_pov + + +class GroupMetaSummary(BaseModel): + """Classifier output: a merged group-level view of a closed scene. + + Defaults are an empty no-op so callers can use the schema's default + as a sentinel; in practice :func:`merge_group_summary` builds an + explicit naive-concat fallback rather than returning these defaults + directly so existing Phase 2 behavior is preserved on classifier + failure. + """ + + summary: str = "" + dynamic: str = "" + + +_GROUP_MERGE_SYSTEM = ( + "Given two per-POV scene summaries from a 3-entity scene (you + " + "host + guest), produce a coherent group-level summary capturing " + "the shared events as both witnesses experienced them, plus a " + "brief 'dynamic' note describing the trio's group dynamic during " + "the scene. Output strict JSON matching schema." +) + + +async def merge_group_summary( + client: LLMClient, + *, + classifier_model: str, + host_name: str, + host_pov_summary: str, + guest_name: str, + guest_pov_summary: str, + timeout_s: float = 30.0, +) -> GroupMetaSummary: + """Merge two per-POV scene summaries into a coherent group-level + summary + group-dynamic note. Falls back to the naive concat (the + existing behavior) on classifier failure.""" + user = ( + f"{host_name} (host) POV summary:\n{host_pov_summary}\n\n" + f"{guest_name} (guest) POV summary:\n{guest_pov_summary}" + ) + fallback = GroupMetaSummary( + summary=( + f"{host_name}: {host_pov_summary}\n\n" + f"{guest_name}: {guest_pov_summary}" + ), + dynamic="", + ) + return await classify( + client, + model=classifier_model, + system=_GROUP_MERGE_SYSTEM, + user=user, + schema=GroupMetaSummary, + default=fallback, + timeout_s=timeout_s, + ) diff --git a/tests/test_per_pov_summary.py b/tests/test_per_pov_summary.py index f33345a..c401ea8 100644 --- a/tests/test_per_pov_summary.py +++ b/tests/test_per_pov_summary.py @@ -636,8 +636,10 @@ async def test_close_with_guest_updates_both_edges(tmp_path): @pytest.mark.asyncio async def test_close_with_group_node_updates_group_summary(tmp_path): """When a group_node row exists, scene close emits group_node_updated - with a non-empty summary that mentions both bots' names (v2 naive - concat of per-POV summaries).""" + with a non-empty summary that mentions both bots' names. T70 swapped + the Phase 2 naive concat for an LLM-merged summary; this regression + test feeds bad-JSON merge responses so the helper falls back to the + original naive-concat shape, preserving the original assertions.""" db = tmp_path / "t.db" apply_migrations(db) import chat.state.group_node # noqa: F401 -- register handlers @@ -660,7 +662,11 @@ async def test_close_with_group_node_updates_group_summary(tmp_path): _seed_two_bot_scene(conn, with_group_node=True) project(conn) - client = MockLLMClient(canned=[host_canned, guest_canned]) + # 2 valid (host POV, guest POV) + 3 bad-JSON merge attempts -> + # merge_group_summary falls back to the naive concat default. + client = MockLLMClient( + canned=[host_canned, guest_canned, "bad1", "bad2", "bad3"] + ) await apply_scene_close_summary( conn, client, @@ -675,8 +681,167 @@ async def test_close_with_group_node_updates_group_summary(tmp_path): gn = get_group_node(conn, "chat_bot_a") assert gn is not None assert gn["summary"] # non-empty - # Naive concat surfaces both bot names in the group summary. + # Naive-concat fallback surfaces both bot names in the group summary. assert "BotA" in gn["summary"] assert "BotB" in gn["summary"] - # Phase 2 v2 keeps dynamic empty (Phase 3 polishes). + # Naive-concat fallback keeps dynamic empty. assert gn["dynamic"] == "" + + +# --------------------------------------------------------------------------- +# T70: LLM-merged group meta-summary on scene close. +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_group_summary_merges_per_pov_via_classifier_when_guest_present( + tmp_path, +): + """With a guest present and a group_node row, scene close runs the + merge classifier as a third call after the two per-POV summarize_scene + calls; its output drives the group_node summary + dynamic fields.""" + db = tmp_path / "t.db" + apply_migrations(db) + import chat.state.group_node # noqa: F401 -- register handlers + + host_canned = json.dumps( + { + "summary": "BotA appreciated the calm.", + "knowledge_facts": [], + "relationship_summary": "BotA felt steady.", + } + ) + guest_canned = json.dumps( + { + "summary": "BotB found the room friendly.", + "knowledge_facts": [], + "relationship_summary": "BotB warmed up.", + } + ) + merge_canned = json.dumps( + {"summary": "merged group view", "dynamic": "warm rapport"} + ) + with open_db(db) as conn: + _seed_two_bot_scene(conn, with_group_node=True) + project(conn) + + # Canned-queue layout matches the production call order in + # apply_scene_close_summary: host POV summarize_scene runs first, + # then guest POV summarize_scene, then merge_group_summary. + client = MockLLMClient( + canned=[host_canned, guest_canned, merge_canned] + ) + await apply_scene_close_summary( + conn, + client, + classifier_model="x", + chat_id="chat_bot_a", + scene_id=1, + host_bot_id="bot_a", + ) + + # All three canned entries consumed -> classifier ran exactly 3x. + assert client._canned == [] + + from chat.state.group_node import get_group_node + + gn = get_group_node(conn, "chat_bot_a") + assert gn is not None + assert gn["summary"] == "merged group view" + assert gn["dynamic"] == "warm rapport" + + +@pytest.mark.asyncio +async def test_group_summary_falls_back_to_naive_concat_on_classifier_failure( + tmp_path, +): + """If the merge classifier flaps (bad JSON across all 3 retries), the + helper falls back to the original Phase 2 naive concat shape and + leaves dynamic empty.""" + db = tmp_path / "t.db" + apply_migrations(db) + import chat.state.group_node # noqa: F401 -- register handlers + + host_canned = json.dumps( + { + "summary": "BotA appreciated the calm.", + "knowledge_facts": [], + "relationship_summary": "BotA felt steady.", + } + ) + guest_canned = json.dumps( + { + "summary": "BotB found the room friendly.", + "knowledge_facts": [], + "relationship_summary": "BotB warmed up.", + } + ) + with open_db(db) as conn: + _seed_two_bot_scene(conn, with_group_node=True) + project(conn) + + # 2 valid POV summaries + 3 bad-JSON merge attempts trip the + # classifier's retry-then-default path; the default is the naive + # concat fallback. + client = MockLLMClient( + canned=[host_canned, guest_canned, "bad1", "bad2", "bad3"] + ) + await apply_scene_close_summary( + conn, + client, + classifier_model="x", + chat_id="chat_bot_a", + scene_id=1, + host_bot_id="bot_a", + ) + + from chat.state.group_node import get_group_node + + gn = get_group_node(conn, "chat_bot_a") + assert gn is not None + expected = ( + "BotA: BotA appreciated the calm.\n\n" + "BotB: BotB found the room friendly." + ) + assert gn["summary"] == expected + assert gn["dynamic"] == "" + + +@pytest.mark.asyncio +async def test_group_summary_skipped_when_no_guest(tmp_path): + """No-guest path: scene close does NOT invoke merge_group_summary + and emits no group_node_updated event. Confirms the existing + `if guest_bot_id is not None` gating at the call site.""" + db = tmp_path / "t.db" + apply_migrations(db) + canned = json.dumps( + { + "summary": "BotA helped you talk through the deadline anxiety.", + "knowledge_facts": ["Deadline next Friday."], + "relationship_summary": "BotA leaned in supportively.", + } + ) + with open_db(db) as conn: + _seed_single_bot_scene(conn) + project(conn) + + # Only 1 canned entry; if merge_group_summary were called the + # MockLLMClient would IndexError on the empty queue. + client = MockLLMClient(canned=[canned]) + await apply_scene_close_summary( + conn, + client, + classifier_model="x", + chat_id="chat_bot_a", + scene_id=1, + host_bot_id="bot_a", + ) + + # Exactly the host POV call consumed, nothing else. + assert client._canned == [] + + # No group_node_updated event was emitted. + rows = conn.execute( + "SELECT 1 FROM event_log WHERE kind = 'group_node_updated'" + ).fetchall() + assert rows == [] -- 2.52.0 From 428438b2233d55ddfb25c46c9d95af18cf48f239 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:11:20 -0400 Subject: [PATCH 04/17] fix: witness role parametric in prompt assembly (T71.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 T46 pinned the witness mask contract on search_memories with a witness_role parameter (host/guest/you). The prompt-assembly call site in assemble_narrative_prompt was hardcoded to "host", which silently returned the wrong rows when the speaker was the guest bot. Derive the witness role from chat membership via a new private helper _witness_role_for(speaker_bot_id, host_bot_id), and apply it at the search_memories call. Behaviour is identical when the speaker is the host (or when no guest is present); the fix is load-bearing only when the guest bot is the speaker — exactly the scenario Phase 2 T43 added support for. Tests: pin both directions (host-as-speaker and guest-as-speaker) by patching the imported search_memories reference and asserting the witness_role argument the call site emits. --- chat/services/prompt.py | 19 ++++++++++++- tests/test_prompt.py | 62 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/chat/services/prompt.py b/chat/services/prompt.py index 48a0a7f..5ed4905 100644 --- a/chat/services/prompt.py +++ b/chat/services/prompt.py @@ -273,6 +273,18 @@ def _resolve_previous_scene_summary( return mem[0] +def _witness_role_for(speaker_bot_id: str, host_bot_id: str | None) -> str: + """Return the witness POV role for the speaker's memory query. + + The host bot of a chat queries memories with ``witness_role="host"``; + the guest bot queries with ``witness_role="guest"``. Phase 2 T46 + pinned the contract on ``search_memories``; this helper applies it + at the call site so a guest-as-speaker doesn't silently retrieve + memories under the wrong POV mask. + """ + return "host" if speaker_bot_id == host_bot_id else "guest" + + def _resolve_addressee( conn: Connection, addressee: str, you: dict | None ) -> tuple[str, str]: @@ -433,7 +445,12 @@ def assemble_narrative_prompt( memory_summaries = [] if query: try: - hits = search_memories(conn, speaker_bot_id, "host", query, k=4) + witness_role = _witness_role_for( + speaker_bot_id, chat.get("host_bot_id") + ) + hits = search_memories( + conn, speaker_bot_id, witness_role, query, k=4 + ) memory_summaries = [h["pov_summary"] for h in hits] except Exception: memory_summaries = [] diff --git a/tests/test_prompt.py b/tests/test_prompt.py index 2ef4641..d4fc6a5 100644 --- a/tests/test_prompt.py +++ b/tests/test_prompt.py @@ -452,6 +452,68 @@ def test_assemble_when_speaker_is_guest_orients_edges_correctly(tmp_path): assert "68/100" in body +def test_speaker_is_guest_uses_guest_witness_role(tmp_path, monkeypatch): + """T71.1: when the guest is the speaker, ``search_memories`` is + called with ``witness_role="guest"``, not the previously-hardcoded + ``"host"``. Pins the parametric witness role at the prompt call site + so guest-as-speaker honours the witness mask via Phase 2 T46. + """ + db = tmp_path / "t.db" + apply_migrations(db) + captured: dict = {} + + def _fake_search(conn, owner_id, witness_role, query, k=4): + captured["owner_id"] = owner_id + captured["witness_role"] = witness_role + captured["query"] = query + return [] + + # Patch the imported reference inside the prompt module so the + # production call site uses the fake. + import chat.services.prompt as prompt_mod + monkeypatch.setattr(prompt_mod, "search_memories", _fake_search) + + with open_db(db) as conn: + _seed_with_guest(conn) + # Guest as speaker — must request memories with witness_role="guest". + assemble_narrative_prompt( + conn, + chat_id="chat_bot_a", + speaker_bot_id="bot_b", + recent_dialogue=[], + # retrieved_memory_summaries=None forces the search path. + retrieved_memory_summaries=None, + ) + assert captured["owner_id"] == "bot_b" + assert captured["witness_role"] == "guest" + + +def test_speaker_is_host_uses_host_witness_role(tmp_path, monkeypatch): + """T71.1 (regression): host-as-speaker still queries with + ``witness_role="host"``.""" + db = tmp_path / "t.db" + apply_migrations(db) + captured: dict = {} + + def _fake_search(conn, owner_id, witness_role, query, k=4): + captured["witness_role"] = witness_role + return [] + + import chat.services.prompt as prompt_mod + monkeypatch.setattr(prompt_mod, "search_memories", _fake_search) + + with open_db(db) as conn: + _seed_with_guest(conn) + assemble_narrative_prompt( + conn, + chat_id="chat_bot_a", + speaker_bot_id="bot_a", # host as speaker + recent_dialogue=[], + retrieved_memory_summaries=None, + ) + assert captured["witness_role"] == "host" + + def test_assemble_with_tight_budget_drops_guest_activity_first(tmp_path): """Under tight budget MUST blocks survive but SHOULD-tier guest activity is dropped first.""" -- 2.52.0 From afd1a509586eac1e1a1c540ba81f72d7b951ccc8 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:13:24 -0400 Subject: [PATCH 05/17] refactor: single ACTIVITIES: block with bullet-level trim (T71.2) Phase 2 T43 added a SECOND ACTIVITIES: block to render guest activity separately from you+speaker. Two consecutive ACTIVITIES: headers can read as a duplicate-section bug to the LLM and bloat the prompt. Consolidate to a single ACTIVITIES: block whose body is composed from up to three bullets (you, speaker, guest). The block itself is MUST-tier (always renders); bullet-level trim drops bullets in the order guest -> group node -> you -> other edges, with the speaker bullet as the MUST-tier floor (the speaker's own current activity is the load-bearing slice). Implementation chose Option B from the polish plan: pre-truncate the bullets list at trim time before _build_activity_block runs, rather than introduce a granular tier mode in the trim machinery. Rationale documented in code; the existing block-level trim ladder gains a single new toggle (include_you_activity) and the SHOULD-tier guest_activity_block is gone. Tests: - test_single_activities_block_with_three_bullets_when_3_entities: exactly one ACTIVITIES: header with all three entity bullets. - test_tight_budget_drops_guest_activity_bullet_first: speaker bullet survives, guest bullet absent under tight budget. - Existing test_assemble_with_tight_budget_drops_guest_activity_first still passes (asserts on bullet absence, not block-header absence). --- chat/services/prompt.py | 132 ++++++++++++++++++++++++++++------------ tests/test_prompt.py | 60 ++++++++++++++++++ 2 files changed, 152 insertions(+), 40 deletions(-) diff --git a/chat/services/prompt.py b/chat/services/prompt.py index 5ed4905..c820136 100644 --- a/chat/services/prompt.py +++ b/chat/services/prompt.py @@ -368,34 +368,57 @@ def assemble_narrative_prompt( addressee_name, ) - # Activity for present entities. Core (MUST): you + speaker bot. - # Phase 2 (SHOULD-tier): when a third party (guest) is present in - # the chat, append their activity in a separate block so it can be - # trimmed independently under tight budget. - activities: list[dict] = [] + # Activity for present entities — single ACTIVITIES: block with up + # to three bullets (you, speaker, guest). The block itself is + # MUST-tier and survives all trims, but bullet-level trim drops + # bullets in the order guest -> you, keeping the speaker bullet + # (the speaker's own current activity is the load-bearing slice). + # + # T71.2 chose Option B from the polish plan: pre-truncate the + # bullets list at trim time before _build_activity_block runs, + # rather than introducing a granular tier mode in the trim + # machinery. The single-block render avoids the dual-ACTIVITIES: + # header that Phase 2 T43 introduced (read by some LLMs as a + # duplicate-section bug). + you_activity: dict | None = None you_act = get_activity(conn, "you") if you_act is not None: - you_act = dict(you_act) - you_act["_display_name"] = (you or {}).get("name") or "you" - activities.append(you_act) + you_activity = dict(you_act) + you_activity["_display_name"] = (you or {}).get("name") or "you" + + speaker_activity: dict | None = None bot_act = get_activity(conn, speaker_bot_id) if bot_act is not None: - bot_act = dict(bot_act) - bot_act["_display_name"] = bot["name"] - activities.append(bot_act) - activity_block = _build_activity_block(activities) + speaker_activity = dict(bot_act) + speaker_activity["_display_name"] = bot["name"] - # SHOULD-tier guest activity extension (Phase 2 / Task 43). - guest_activity_block: str | None = None + guest_activity: dict | None = None if guest_id is not None: guest_act = get_activity(conn, guest_id) if guest_act is not None: - guest_act = dict(guest_act) + guest_activity = dict(guest_act) guest_bot = get_bot(conn, guest_id) - guest_act["_display_name"] = ( + guest_activity["_display_name"] = ( guest_bot["name"] if guest_bot else guest_id ) - guest_activity_block = _build_activity_block([guest_act]) + + def _activity_block_for( + *, include_you: bool, include_guest: bool + ) -> str | None: + """Render the single ACTIVITIES: block with the requested bullets. + + Speaker bullet is always included (it's the MUST-tier baseline); + ``you`` and ``guest`` bullets are toggled by the caller during + trim. Returns None when no bullets remain. + """ + bullets: list[dict] = [] + if include_you and you_activity is not None: + bullets.append(you_activity) + if speaker_activity is not None: + bullets.append(speaker_activity) + if include_guest and guest_activity is not None: + bullets.append(guest_activity) + return _build_activity_block(bullets) # SHOULD-tier group-node block (Phase 2 / Task 43): rendered only # when the group_node row is present AND it covers all three of @@ -469,11 +492,18 @@ def assemble_narrative_prompt( last4 = dialogue_full[-4:] if dialogue_full else [] must_dialogue_block = _build_dialogue_block(last4, earlier_summary=None) + # MUST-tier ACTIVITIES floor: the speaker bullet alone (you and + # guest bullets are dropped first under bullet-level trim before + # the block bottoms out at speaker-only). + must_activity_block = _activity_block_for( + include_you=False, include_guest=False + ) + must_blocks: list[str | None] = [ speaker_identity, edge_to_addressee, scene_block, - activity_block, + must_activity_block, must_dialogue_block, closing, ] @@ -498,6 +528,7 @@ def assemble_narrative_prompt( include_previous_scene: bool, include_memories_top_k: int, dialogue_keep: int, + include_you_activity: bool = True, include_guest_activity: bool = True, include_group_node: bool = True, ) -> tuple[str, int, list[dict]]: @@ -520,13 +551,20 @@ def assemble_narrative_prompt( if include_previous_scene else None ) + # Single ACTIVITIES: block, bullet-level trim (T71.2). Guest + # bullet drops first, then the you bullet; speaker bullet is the + # MUST-tier floor and always present when an activity row exists. + activity_block = _activity_block_for( + include_you=include_you_activity, + include_guest=include_guest_activity, + ) + body = _join_blocks([ speaker_identity, edge_to_addressee, other_edges_block if include_other_edges else None, scene_block, activity_block, - guest_activity_block if include_guest_activity else None, group_node_block if include_group_node else None, prev_block, memories_block, @@ -544,16 +582,18 @@ def assemble_narrative_prompt( nice_memories_k = min(4, len(memory_summaries)) include_prev = previous_scene_summary is not None include_other = other_edges_block is not None - include_guest_activity = guest_activity_block is not None + include_you_activity = you_activity is not None + include_guest_activity = guest_activity is not None include_group_node = group_node_block is not None def _build(*, prev: bool, mem_k: int, dlg: int, other: bool, - guest_act: bool, group: bool) -> tuple[str, int]: + you_act: bool, guest_act: bool, group: bool) -> tuple[str, int]: body, total, _ = assemble( include_other_edges=other, include_previous_scene=prev, include_memories_top_k=mem_k, dialogue_keep=dlg, + include_you_activity=you_act, include_guest_activity=guest_act, include_group_node=group, ) @@ -561,8 +601,8 @@ def assemble_narrative_prompt( body, total = _build( prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep, - other=include_other, guest_act=include_guest_activity, - group=include_group_node, + other=include_other, you_act=include_you_activity, + guest_act=include_guest_activity, group=include_group_node, ) # If under soft, we're done. @@ -575,8 +615,8 @@ def assemble_narrative_prompt( include_prev = False body, total = _build( prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep, - other=include_other, guest_act=include_guest_activity, - group=include_group_node, + other=include_other, you_act=include_you_activity, + guest_act=include_guest_activity, group=include_group_node, ) if total <= budget_soft: return _emit(body, user_turn_prose) @@ -585,8 +625,8 @@ def assemble_narrative_prompt( nice_memories_k = 2 body, total = _build( prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep, - other=include_other, guest_act=include_guest_activity, - group=include_group_node, + other=include_other, you_act=include_you_activity, + guest_act=include_guest_activity, group=include_group_node, ) if total <= budget_soft: return _emit(body, user_turn_prose) @@ -595,8 +635,8 @@ def assemble_narrative_prompt( nice_dialogue_keep = baseline_keep body, total = _build( prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep, - other=include_other, guest_act=include_guest_activity, - group=include_group_node, + other=include_other, you_act=include_you_activity, + guest_act=include_guest_activity, group=include_group_node, ) if total <= budget_soft: return _emit(body, user_turn_prose) @@ -606,35 +646,47 @@ def assemble_narrative_prompt( nice_memories_k = max(0, nice_memories_k - 1) body, total = _build( prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep, - other=include_other, guest_act=include_guest_activity, - group=include_group_node, + other=include_other, you_act=include_you_activity, + guest_act=include_guest_activity, group=include_group_node, ) - # Drop SHOULD-tier blocks in order: guest activity → group node → - # other edges. (Guest activity goes first per Task 43 spec — it's - # the most expendable additive context.) + # Drop SHOULD-tier extras in order: + # 1. guest activity bullet (T71.2: bullet-level trim within the + # single ACTIVITIES: block — guest goes first per Task 43 spec) + # 2. group node block + # 3. you activity bullet (still SHOULD-tier; speaker bullet is the + # MUST-tier floor and never dropped) + # 4. other edges if include_guest_activity and total > budget_hard: include_guest_activity = False body, total = _build( prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep, - other=include_other, guest_act=include_guest_activity, - group=include_group_node, + other=include_other, you_act=include_you_activity, + guest_act=include_guest_activity, group=include_group_node, ) if include_group_node and total > budget_hard: include_group_node = False body, total = _build( prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep, - other=include_other, guest_act=include_guest_activity, - group=include_group_node, + other=include_other, you_act=include_you_activity, + guest_act=include_guest_activity, group=include_group_node, + ) + + if include_you_activity and total > budget_hard: + include_you_activity = False + body, total = _build( + prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep, + other=include_other, you_act=include_you_activity, + guest_act=include_guest_activity, group=include_group_node, ) if include_other and total > budget_hard: include_other = False body, total = _build( prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep, - other=include_other, guest_act=include_guest_activity, - group=include_group_node, + other=include_other, you_act=include_you_activity, + guest_act=include_guest_activity, group=include_group_node, ) if total > budget_hard: diff --git a/tests/test_prompt.py b/tests/test_prompt.py index d4fc6a5..322ddab 100644 --- a/tests/test_prompt.py +++ b/tests/test_prompt.py @@ -514,6 +514,66 @@ def test_speaker_is_host_uses_host_witness_role(tmp_path, monkeypatch): assert captured["witness_role"] == "host" +def test_single_activities_block_with_three_bullets_when_3_entities(tmp_path): + """T71.2: with you + host + guest present, the assembled prompt + contains exactly ONE ``ACTIVITIES:`` header and bullets for all + three entities (no duplicate header from the prior dual-block + rendering). + """ + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + _seed_with_guest(conn) + msgs = assemble_narrative_prompt( + conn, + chat_id="chat_bot_a", + speaker_bot_id="bot_a", + recent_dialogue=[], + retrieved_memory_summaries=[], + ) + body = msgs[0].content + # Exactly one ACTIVITIES: header. + assert body.count("ACTIVITIES:") == 1 + # Bullets for all three entities (you=Sam, host=Aria, guest=Iris) + # — pin on the unique action verbs from the seed data. + assert "finishing emails" in body # you bullet + assert "pretending to work" in body # speaker (host) bullet + assert "smirking-distinctively" in body # guest bullet + + +def test_tight_budget_drops_guest_activity_bullet_first(tmp_path): + """T71.2: under tight budget the speaker bullet survives but the + guest activity bullet is the first ACTIVITIES: bullet to drop. The + block as a whole stays present (it's MUST-tier); only its body + contracts. + """ + db = tmp_path / "t.db" + apply_migrations(db) + with open_db(db) as conn: + _seed_with_guest(conn) + dialogue = [ + {"speaker": "you", "text": "line-16 hi there"}, + {"speaker": "bot_a", "text": "line-17 hey"}, + {"speaker": "you", "text": "line-18 quiet night"}, + {"speaker": "bot_a", "text": "line-19 indeed"}, + ] + msgs = assemble_narrative_prompt( + conn, + chat_id="chat_bot_a", + speaker_bot_id="bot_a", + recent_dialogue=dialogue, + retrieved_memory_summaries=[], + budget_soft=250, + budget_hard=340, + ) + body = msgs[0].content + # Speaker bullet survives (MUST-tier floor). + assert "pretending to work" in body + assert "ACTIVITIES:" in body + # Guest bullet is dropped first under budget pressure. + assert "smirking-distinctively" not in body + + def test_assemble_with_tight_budget_drops_guest_activity_first(tmp_path): """Under tight budget MUST blocks survive but SHOULD-tier guest activity is dropped first.""" -- 2.52.0 From 73bb8c1f17201dacdd423cba407484528e46b9db Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:16:02 -0400 Subject: [PATCH 06/17] chore: document NICE trim order rationale (T71.3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T18 review (Phase 1) noted the NICE-tier trim drops previous-scene FIRST while §6.3 spec lists previous-scene LAST in the NICE tier group. Decision: keep the existing greedy order (previous-scene first), and document why. Rationale (now in code at the trim ladder): 1. Cheapest-impact-first — a per-POV previous-scene summary loses less narrative continuity than the older dialogue turns or memory hits it competes with. 2. Greedy lookahead is more expensive than the marginal narrative loss. Dropping previous-scene typically clears the soft-budget slack in one step. Test added: test_nice_trim_order_documented pins the observed order (previous-scene -> memories -> dialogue) so a future refactor can't silently invert it. Sized so that all-NICE config overflows soft but dropping just previous-scene fits — proves memories and older dialogue turns survive while previous-scene is the FIRST drop. --- chat/services/prompt.py | 20 ++++++ tests/test_prompt.py | 145 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) diff --git a/chat/services/prompt.py b/chat/services/prompt.py index c820136..6e6d72c 100644 --- a/chat/services/prompt.py +++ b/chat/services/prompt.py @@ -611,6 +611,26 @@ def assemble_narrative_prompt( # Drop NICE in order: previous scene → memories beyond top-2 → # older dialogue turns (collapse to 4). + # + # T71.3 — order rationale: the §6.3 spec lists NICE-tier members + # with previous-scene LAST, which read as a literal trim order + # during T18 review. We deliberately keep the greedy order shown + # here (previous-scene FIRST) for two reasons: + # + # 1. Cheapest-impact-first: a per-POV previous-scene summary is + # a single short paragraph that loses very little narrative + # continuity when dropped, while the older dialogue turns it + # is competing with carry the speaker's last few beats — those + # ground the next response far more concretely. + # 2. Greedy lookahead is more expensive than the marginal + # narrative loss. Dropping previous-scene typically clears + # the soft-budget slack in one step; trying memories or + # dialogue first would routinely require multiple recompute + # passes through the assembler. + # + # The pin test test_nice_trim_order_documented locks this order so + # a future refactor can't quietly invert it without surfacing the + # decision. if include_prev: include_prev = False body, total = _build( diff --git a/tests/test_prompt.py b/tests/test_prompt.py index 322ddab..f50fdea 100644 --- a/tests/test_prompt.py +++ b/tests/test_prompt.py @@ -574,6 +574,151 @@ def test_tight_budget_drops_guest_activity_bullet_first(tmp_path): assert "smirking-distinctively" not in body +def test_nice_trim_order_documented(tmp_path): + """T71.3: pin the NICE-tier trim order so a future refactor can't + quietly invert it. + + Order under NICE pressure is: + 1. previous-scene summary (dropped FIRST) + 2. memories beyond top-2 + 3. older dialogue turns (collapsed to last-4) + + We size the budget so that all-NICE-included is over soft, but + dropping ONLY previous-scene gets us back under soft. The observed + behaviour we pin: previous-scene gone, memories/dialogue intact. + """ + db = tmp_path / "t.db" + apply_migrations(db) + # Heavy previous-scene summary — large enough that dropping it + # alone clears the soft-budget overage. Defined out here so the + # marker is in scope for the assertions below. + prev_scene_blob = "PREVSCENE-MARKER " + ("filler " * 200) + with open_db(db) as conn: + # Append all events first, project once at the end (project is + # not idempotent — it replays every event in the log). + from chat.eventlog.log import append_event as _append + _append(conn, kind="bot_authored", payload={ + "id": "bot_a", + "name": "Aria", + "persona": "reserved coworker who notices things", + "voice_samples": ["I — sorry, I didn't mean to."], + "traits": ["introverted"], + "backstory": "An archivist who joined the firm last spring.", + "initial_relationship_to_you": "coworker", + "kickoff_prose": "you stay late at the office", + }) + _append(conn, kind="you_authored", payload={ + "name": "Sam", + "pronouns": "they/them", + "persona": "tired analyst", + }) + _append(conn, kind="chat_created", payload={ + "id": "chat_bot_a", + "host_bot_id": "bot_a", + "guest_bot_id": None, + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1 evening", + "weather": "clear", + }) + _append(conn, kind="container_created", payload={ + "chat_id": "chat_bot_a", + "name": "office bullpen", + "type": "workplace", + "properties": {"public": False, "moving": False, "audible_range": "room"}, + }) + _append(conn, kind="edge_update", payload={ + "source_id": "bot_a", + "target_id": "you", + "affinity_delta": 12, + "trust_delta": 5, + "knowledge_facts": ["they work on the same floor"], + }) + _append(conn, kind="activity_change", payload={ + "entity_id": "you", + "container_id": 1, + "posture": "sitting at your desk", + "action": {"verb": "finishing emails"}, + "attention": "the screen", + }) + _append(conn, kind="activity_change", payload={ + "entity_id": "bot_a", + "container_id": 1, + "posture": "sitting at her desk", + "action": {"verb": "pretending to work"}, + "attention": "you, in glances", + }) + _append(conn, kind="scene_opened", payload={ + "chat_id": "chat_bot_a", + "container_id": 1, + "started_at": "2026-04-26T20:00:00+00:00", + "participants": ["you", "bot_a"], + }) + # Close the seeded scene and write a per-POV summary memory so + # _resolve_previous_scene_summary returns a non-empty string. + _append(conn, kind="scene_closed", payload={ + "scene_id": 1, + "ended_at": "2026-04-26T20:30:00+00:00", + "significance": 2, + }) + _append(conn, kind="memory_written", payload={ + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "scene_id": 1, + "pov_summary": prev_scene_blob, + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "source": "direct", + "reliability": 1.0, + "significance": 2, + }) + project(conn) + + # Six dialogue turns — last 4 plus 2 older. If older turns are + # dropped under NICE pressure, the unique markers for turns 0/1 + # disappear; we'll assert they REMAIN to prove dialogue trim + # didn't fire. + dialogue = [ + {"speaker": "you", "text": "DLG-OLD-00 hello"}, + {"speaker": "bot_a", "text": "DLG-OLD-01 hi"}, + {"speaker": "you", "text": "DLG-LAST-16 ok"}, + {"speaker": "bot_a", "text": "DLG-LAST-17 sure"}, + {"speaker": "you", "text": "DLG-LAST-18 night"}, + {"speaker": "bot_a", "text": "DLG-LAST-19 indeed"}, + ] + # Four small memories — if "memories beyond top-2" trim fires, + # MEM-C/MEM-D disappear; we'll assert they REMAIN to prove + # memories trim didn't fire either. + memories = ["MEM-A short", "MEM-B short", "MEM-C short", "MEM-D short"] + + # Soft tuned so the all-NICE config (with the heavy previous + # scene summary) overflows, but dropping just previous-scene + # fits comfortably. Hard set high so SHOULD-tier never trims. + msgs = assemble_narrative_prompt( + conn, + chat_id="chat_bot_a", + speaker_bot_id="bot_a", + recent_dialogue=dialogue, + retrieved_memory_summaries=memories, + budget_soft=400, + budget_hard=8000, + ) + body = msgs[0].content + # Previous-scene summary was the FIRST NICE drop — its unique + # marker must be absent. + assert "PREVSCENE-MARKER" not in body + # Memories beyond top-2 stayed (proves memories trim did NOT fire). + assert "MEM-A" in body + assert "MEM-B" in body + assert "MEM-C" in body + assert "MEM-D" in body + # Older dialogue turns stayed (proves dialogue trim did NOT fire). + assert "DLG-OLD-00" in body + assert "DLG-OLD-01" in body + # Last-4 dialogue turns of course present. + assert "DLG-LAST-19" in body + + def test_assemble_with_tight_budget_drops_guest_activity_first(tmp_path): """Under tight budget MUST blocks survive but SHOULD-tier guest activity is dropped first.""" -- 2.52.0 From 21404a373bf20d2c75a5ccf0fb607f49a1198e0d Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:24:24 -0400 Subject: [PATCH 07/17] feat: drawer edits for edge_trust / edge_summary / memory_pov_summary / knowledge_facts (T72.1) Adds the four POST routes whose state-layer support was already dispatched by the manual_edit projector (edge_trust, edge_summary, memory_pov_summary) plus a new edge_knowledge_fact dispatch branch for add/remove fact list manipulation. Drawer template gains editable textareas, sliders, and add/remove fact controls. Remove semantics on knowledge_fact match by string (not index) so concurrent edge_update events appending facts between drawer renders don't desync the form. --- chat/state/manual_edit.py | 41 +++- chat/templates/_drawer.html | 100 ++++++++- chat/web/drawer.py | 235 +++++++++++++++++++- tests/test_drawer_edits_extended.py | 327 ++++++++++++++++++++++++++++ 4 files changed, 685 insertions(+), 18 deletions(-) create mode 100644 tests/test_drawer_edits_extended.py diff --git a/chat/state/manual_edit.py b/chat/state/manual_edit.py index e796a7a..57e6c3b 100644 --- a/chat/state/manual_edit.py +++ b/chat/state/manual_edit.py @@ -6,7 +6,7 @@ be reversed by emitting an inverse ``manual_edit`` later. This module applies the new value to the appropriate target table; the snapshot of ``prior_value`` is taken by the route handler before this fires. -Phase 1 covers four target kinds: +Phase 1 covers five target kinds: - ``edge_affinity`` and ``edge_trust`` — slider edits on a specific edge, clamped to 0..100. - ``memory_significance`` — dropdown edit, clamped to 0..3. @@ -17,8 +17,12 @@ Phase 1 covers four target kinds: field. Driven by T27 from the classifier's ``relationship_summary`` output combined with the prior summary. -Other §6.4 editable fields (activity verb / attention / posture, -knowledge_facts list manipulation) are deferred to Phase 1.5. +T72.1 (Phase 2.5) adds one list-shaped edit: +- ``edge_knowledge_fact`` — add/remove a single fact on an edge's + ``knowledge_json`` list. Payload carries an ``action`` of ``"add"`` or + ``"remove"`` and a ``fact`` string; remove matches the first occurrence + by string equality so the route handler doesn't have to track fact + indices across re-renders. Pin toggles intentionally use the existing ``memory_pin_changed`` event (registered in :mod:`chat.state.memory`) rather than ``manual_edit`` so @@ -27,6 +31,7 @@ the projection writes both ``pinned`` and ``auto_pinned`` atomically. from __future__ import annotations +import json from sqlite3 import Connection from chat.eventlog.log import Event @@ -87,5 +92,33 @@ def _apply_manual_edit(conn: Connection, e: Event) -> None: target_id["target_id"], ), ) + elif kind == "edge_knowledge_fact": + # T72.1: add or remove a single fact on an edge's knowledge list. + # ``target_id`` is the {"source_id", "target_id"} edge pair; + # ``new_value`` carries ``{"action": "add"|"remove", "fact": str}``. + # Remove matches by string equality (first occurrence) so callers + # don't have to thread a fact_index through re-rendered drawers. + action = new_value["action"] + fact = str(new_value["fact"]) + row = conn.execute( + "SELECT knowledge_json FROM edges " + "WHERE source_id = ? AND target_id = ?", + (target_id["source_id"], target_id["target_id"]), + ).fetchone() + if row is not None: + knowledge = json.loads(row[0]) + if action == "add": + knowledge.append(fact) + elif action == "remove" and fact in knowledge: + knowledge.remove(fact) + conn.execute( + "UPDATE edges SET knowledge_json = ? " + "WHERE source_id = ? AND target_id = ?", + ( + json.dumps(knowledge), + target_id["source_id"], + target_id["target_id"], + ), + ) # Unknown target_kind: silently no-op for v1. Future kinds (activity - # fields, knowledge_facts list manipulation) extend the dispatch above. + # fields, etc.) extend the dispatch above. diff --git a/chat/templates/_drawer.html b/chat/templates/_drawer.html index 6bfb887..b6d2033 100644 --- a/chat/templates/_drawer.html +++ b/chat/templates/_drawer.html @@ -156,19 +156,95 @@ - {% if edge_b2y.summary %}

{{ edge_b2y.summary }}

{% endif %} - {% if edge_b2y.knowledge %} -
Knowledge ({{ edge_b2y.knowledge|length }}) -
    {% for fact in edge_b2y.knowledge %}
  • {{ fact }}
  • {% endfor %}
-
- {% endif %} +
+ + + + +
+
+ + + + +
+
+ Knowledge ({{ (edge_b2y.knowledge or [])|length }}) + {% if edge_b2y.knowledge %} +
    + {% for fact in edge_b2y.knowledge %} +
  • + {{ fact }} +
    + + + + + +
    +
  • + {% endfor %} +
+ {% endif %} +
+ + + + + +
+
{% endif %} {% if edge_y2b %}
you → {{ host_bot.name }}

Affinity: {{ edge_y2b.affinity }}/100 · Trust: {{ edge_y2b.trust }}/100

- {% if edge_y2b.summary %}

{{ edge_y2b.summary }}

{% endif %} +
+ + + + +
+
+ + + + +
{% endif %} {% if not edge_b2y and not edge_y2b %} @@ -224,6 +300,16 @@ +
+ Edit POV summary +
+ + + +
+
{% endfor %} diff --git a/chat/web/drawer.py b/chat/web/drawer.py index d38d850..341dce7 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -1,4 +1,4 @@ -"""Chat drawer — read view (T24) and inline edits (T25). +"""Chat drawer — read view (T24) and inline edits (T25, T72). The GET endpoint renders an HTML partial showing the current scene + container, per-entity activity, host <-> you edges, pinned memories with @@ -13,14 +13,16 @@ returning the refreshed drawer partial so HTMX can swap it in: * pin toggle on a memory (emits ``memory_pin_changed`` with ``auto_pinned=0`` so a manual pin is not subject to auto-eviction). +T72 (Phase 2.5) extends the inline-edit set to cover the remaining +§6.4 editable fields whose state-layer support already lands in the +``manual_edit`` projector: edge trust slider, edge summary textarea, +memory POV summary textarea, and per-edge knowledge-fact add/remove. It +also exposes a witness-flag toggle (``you/host/guest``) per memory row +and a "first-meeting gate" on the Add-guest form so an existing edge +isn't quietly overwritten by a re-seed. + Each ``manual_edit`` payload snapshots the prior value alongside the new one so a later inverse edit can restore state (§6.4 final paragraph). - -Other §6.4 editable fields (activity verb/attention/posture, edge_trust, -edge summary, knowledge_facts list, memory pov_summary) are deferred to -a Phase 1.5 follow-up — the dispatch in :mod:`chat.state.manual_edit` -already accepts more ``target_kind`` values, so adding their routes is a -mechanical extension. """ from __future__ import annotations @@ -55,6 +57,13 @@ PIN_CAP = 8 # Recent-memories list is bounded to keep the drawer cheap to render. RECENT_LIMIT = 10 +# T72.1 caps on free-form textarea edits. Edge summaries and per-POV +# memory summaries are drawer-driven prose — bound them so a stray paste +# can't blow up the projected row size or the SSE drawer refresh payload. +EDGE_SUMMARY_MAX = 2000 +MEMORY_POV_SUMMARY_MAX = 2000 +KNOWLEDGE_FACT_MAX = 500 + @router.get("/chats/{chat_id}/drawer", response_class=HTMLResponse) async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)): @@ -342,6 +351,218 @@ async def toggle_memory_pin( return await drawer(chat_id, request, conn) +# --- T72.1 deferred v1 drawer edits -------------------------------------- +# +# These four endpoints round out the §6.4 editable surface — the +# ``manual_edit`` projector already dispatches ``edge_trust``, +# ``edge_summary``, and ``memory_pov_summary`` (T25); ``edge_knowledge_fact`` +# is a new dispatch branch added alongside this commit. Each route follows +# the T25 pattern: snapshot the prior value, append + apply ``manual_edit``, +# then re-render the drawer partial. + + +@router.post( + "/chats/{chat_id}/drawer/edge/trust", + response_class=HTMLResponse, +) +async def edit_edge_trust( + chat_id: str, + request: Request, + source_id: str = Form(...), + target_id: str = Form(...), + new_value: int = Form(...), + conn=Depends(get_conn), +): + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + + if not 0 <= int(new_value) <= 100: + raise HTTPException( + status_code=400, + detail=f"trust must be in [0, 100], got {new_value}", + ) + + edge = get_edge(conn, source_id, target_id) + if edge is None: + raise HTTPException( + status_code=404, + detail=f"edge not found: {source_id}->{target_id}", + ) + + prior = int(edge["trust"]) + append_and_apply( + conn, + kind="manual_edit", + payload={ + "target_kind": "edge_trust", + "target_id": {"source_id": source_id, "target_id": target_id}, + "prior_value": prior, + "new_value": int(new_value), + }, + ) + return await drawer(chat_id, request, conn) + + +@router.post( + "/chats/{chat_id}/drawer/edge/summary", + response_class=HTMLResponse, +) +async def edit_edge_summary( + chat_id: str, + request: Request, + source_id: str = Form(...), + target_id: str = Form(...), + new_summary: str = Form(...), + conn=Depends(get_conn), +): + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + + if len(new_summary) > EDGE_SUMMARY_MAX: + raise HTTPException( + status_code=400, + detail=( + f"edge summary exceeds {EDGE_SUMMARY_MAX} chars " + f"(got {len(new_summary)})" + ), + ) + + edge = get_edge(conn, source_id, target_id) + if edge is None: + raise HTTPException( + status_code=404, + detail=f"edge not found: {source_id}->{target_id}", + ) + + prior = edge.get("summary") or "" + append_and_apply( + conn, + kind="manual_edit", + payload={ + "target_kind": "edge_summary", + "target_id": {"source_id": source_id, "target_id": target_id}, + "prior_value": prior, + "new_value": new_summary, + }, + ) + return await drawer(chat_id, request, conn) + + +@router.post( + "/chats/{chat_id}/drawer/memory/pov-summary", + response_class=HTMLResponse, +) +async def edit_memory_pov_summary( + chat_id: str, + request: Request, + memory_id: int = Form(...), + new_summary: str = Form(...), + conn=Depends(get_conn), +): + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + + if len(new_summary) > MEMORY_POV_SUMMARY_MAX: + raise HTTPException( + status_code=400, + detail=( + f"memory pov_summary exceeds {MEMORY_POV_SUMMARY_MAX} chars " + f"(got {len(new_summary)})" + ), + ) + + # 404 when the memory either doesn't exist or belongs to a different + # chat — the drawer never surfaces cross-chat memories so editing one + # would be a path-traversal-style mistake. + row = conn.execute( + "SELECT pov_summary FROM memories WHERE id = ? AND chat_id = ?", + (int(memory_id), chat_id), + ).fetchone() + if row is None: + raise HTTPException( + status_code=404, + detail=f"memory not found in chat: {memory_id}", + ) + + prior = row[0] or "" + append_and_apply( + conn, + kind="manual_edit", + payload={ + "target_kind": "memory_pov_summary", + "target_id": int(memory_id), + "prior_value": prior, + "new_value": new_summary, + }, + ) + return await drawer(chat_id, request, conn) + + +@router.post( + "/chats/{chat_id}/drawer/edge/knowledge-facts", + response_class=HTMLResponse, +) +async def edit_edge_knowledge_facts( + chat_id: str, + request: Request, + source_id: str = Form(...), + target_id: str = Form(...), + action: str = Form(...), + fact: str = Form(...), + conn=Depends(get_conn), +): + """Add or remove a single knowledge_fact on an edge. + + Remove semantics are by string match (first occurrence) — the drawer + re-renders after every edit so threading a stable index through is + fragile when concurrent ``edge_update`` events can append more facts + between renders. The projector is a no-op when the fact isn't found, + keeping the route idempotent for stale form submissions. + """ + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + + if action not in ("add", "remove"): + raise HTTPException( + status_code=400, + detail=f"action must be 'add' or 'remove', got {action!r}", + ) + + if len(fact) > KNOWLEDGE_FACT_MAX: + raise HTTPException( + status_code=400, + detail=( + f"fact exceeds {KNOWLEDGE_FACT_MAX} chars (got {len(fact)})" + ), + ) + if not fact.strip(): + raise HTTPException(status_code=400, detail="fact must not be empty") + + edge = get_edge(conn, source_id, target_id) + if edge is None: + raise HTTPException( + status_code=404, + detail=f"edge not found: {source_id}->{target_id}", + ) + + prior = list(edge.get("knowledge") or []) + append_and_apply( + conn, + kind="manual_edit", + payload={ + "target_kind": "edge_knowledge_fact", + "target_id": {"source_id": source_id, "target_id": target_id}, + "prior_value": prior, + "new_value": {"action": action, "fact": fact}, + }, + ) + return await drawer(chat_id, request, conn) + + # --- T42 guest add/remove ------------------------------------------------- # # Adding a guest fans out into up to four events: a ``guest_added`` to flip diff --git a/tests/test_drawer_edits_extended.py b/tests/test_drawer_edits_extended.py new file mode 100644 index 0000000..670ead6 --- /dev/null +++ b/tests/test_drawer_edits_extended.py @@ -0,0 +1,327 @@ +"""T72.1: deferred v1 drawer edits. + +T25 shipped affinity / significance / pin. T72.1 fills in the rest of the +§6.4 editable surface whose ``manual_edit`` projector dispatch was already +in place (or, for ``edge_knowledge_fact``, added alongside the route): + +* ``POST /chats/{chat_id}/drawer/edge/trust`` — slider 0..100. +* ``POST /chats/{chat_id}/drawer/edge/summary`` — textarea, capped 2000. +* ``POST /chats/{chat_id}/drawer/memory/pov-summary`` — textarea, capped. +* ``POST /chats/{chat_id}/drawer/edge/knowledge-facts`` — add/remove fact. + +Each test asserts (a) the ``manual_edit`` event lands in the log, +(b) the projected table reflects the new value, and (c) the response is +the refreshed drawer partial. + +T72.3's witness-flag tests extend this file with the inline-edit pair. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from chat.app import app +from chat.db.connection import open_db +from chat.eventlog.log import append_event +from chat.eventlog.projector import project + + +@pytest.fixture +def client(tmp_path, monkeypatch): + cfg = tmp_path / "config.toml" + cfg.write_text('featherless_api_key = "test"\n') + monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg)) + db = tmp_path / "test.db" + monkeypatch.setenv("CHAT_DB_PATH", str(db)) + with TestClient(app) as c: + if hasattr(app.state, "background_worker"): + app.state.background_worker.enabled = False + yield c + + +def _seed(db: Path) -> None: + """Seed a chat with one host bot, one host->you edge with a fact and + summary already set, and one memory authored by ``bot_a`` witnessed by + all three roles. Tests reach into projected state to verify edits. + """ + with open_db(db) as conn: + append_event( + conn, + kind="bot_authored", + payload={ + "id": "bot_a", + "name": "BotA", + "persona": "...", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "", + "kickoff_prose": "", + }, + ) + append_event( + conn, + kind="chat_created", + payload={ + "id": "chat_bot_a", + "host_bot_id": "bot_a", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + # Materialise edge bot_a -> you with a knowledge_fact already on it + # so the remove path has something to consume. + append_event( + conn, + kind="edge_update", + payload={ + "source_id": "bot_a", + "target_id": "you", + "chat_id": "chat_bot_a", + "affinity_delta": 0, + "knowledge_facts": ["studied physics together"], + }, + ) + append_event( + conn, + kind="memory_written", + payload={ + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "pov_summary": "Original summary text.", + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "significance": 1, + }, + ) + project(conn) + + +# --- T72.1 tests ---------------------------------------------------------- + + +def test_edit_edge_trust_emits_manual_edit_and_updates(client, tmp_path): + _seed(tmp_path / "test.db") + response = client.post( + "/chats/chat_bot_a/drawer/edge/trust", + data={"source_id": "bot_a", "target_id": "you", "new_value": "73"}, + ) + assert response.status_code == 200 + # Refresh shows the new trust value somewhere in the partial. + assert "73" in response.text + + with open_db(tmp_path / "test.db") as conn: + rows = conn.execute( + "SELECT payload_json FROM event_log WHERE kind = 'manual_edit'" + ).fetchall() + assert len(rows) == 1 + payload = json.loads(rows[0][0]) + assert payload["target_kind"] == "edge_trust" + assert payload["prior_value"] == 50 + assert payload["new_value"] == 73 + assert payload["target_id"] == { + "source_id": "bot_a", + "target_id": "you", + } + + from chat.state.edges import get_edge + + edge = get_edge(conn, "bot_a", "you") + assert edge["trust"] == 73 + + +def test_edit_edge_trust_400_on_out_of_range(client, tmp_path): + _seed(tmp_path / "test.db") + response = client.post( + "/chats/chat_bot_a/drawer/edge/trust", + data={"source_id": "bot_a", "target_id": "you", "new_value": "150"}, + ) + assert response.status_code == 400 + + +def test_edit_edge_summary_emits_manual_edit_and_updates(client, tmp_path): + _seed(tmp_path / "test.db") + response = client.post( + "/chats/chat_bot_a/drawer/edge/summary", + data={ + "source_id": "bot_a", + "target_id": "you", + "new_summary": "BotA respects you and shares lab notes.", + }, + ) + assert response.status_code == 200 + + with open_db(tmp_path / "test.db") as conn: + rows = conn.execute( + "SELECT payload_json FROM event_log WHERE kind = 'manual_edit'" + ).fetchall() + assert len(rows) == 1 + payload = json.loads(rows[0][0]) + assert payload["target_kind"] == "edge_summary" + assert payload["new_value"].startswith("BotA respects") + assert payload["target_id"] == { + "source_id": "bot_a", + "target_id": "you", + } + + summary = conn.execute( + "SELECT summary FROM edges " + "WHERE source_id = ? AND target_id = ?", + ("bot_a", "you"), + ).fetchone()[0] + assert "respects" in summary + + # And the refreshed partial echoes the new summary back. + assert "respects" in response.text + + +def test_edit_edge_summary_400_on_overflow(client, tmp_path): + _seed(tmp_path / "test.db") + response = client.post( + "/chats/chat_bot_a/drawer/edge/summary", + data={ + "source_id": "bot_a", + "target_id": "you", + "new_summary": "x" * 2001, + }, + ) + assert response.status_code == 400 + + +def test_edit_memory_pov_summary_emits_manual_edit_and_updates( + client, tmp_path +): + _seed(tmp_path / "test.db") + with open_db(tmp_path / "test.db") as conn: + memory_id = conn.execute("SELECT id FROM memories LIMIT 1").fetchone()[0] + + response = client.post( + "/chats/chat_bot_a/drawer/memory/pov-summary", + data={ + "memory_id": str(memory_id), + "new_summary": "Cleaner per-POV restatement of the moment.", + }, + ) + assert response.status_code == 200 + + with open_db(tmp_path / "test.db") as conn: + rows = conn.execute( + "SELECT payload_json FROM event_log WHERE kind = 'manual_edit'" + ).fetchall() + assert len(rows) == 1 + payload = json.loads(rows[0][0]) + assert payload["target_kind"] == "memory_pov_summary" + assert payload["prior_value"] == "Original summary text." + assert payload["new_value"].startswith("Cleaner per-POV") + assert payload["target_id"] == memory_id + + pov = conn.execute( + "SELECT pov_summary FROM memories WHERE id = ?", (memory_id,) + ).fetchone()[0] + assert pov.startswith("Cleaner per-POV") + + assert "Cleaner per-POV" in response.text + + +def test_edit_memory_pov_summary_404_when_wrong_chat(client, tmp_path): + _seed(tmp_path / "test.db") + with open_db(tmp_path / "test.db") as conn: + memory_id = conn.execute("SELECT id FROM memories LIMIT 1").fetchone()[0] + # Re-home the memory to a different chat to confirm the route's + # cross-chat guard fires. + conn.execute( + "UPDATE memories SET chat_id = 'other_chat' WHERE id = ?", + (memory_id,), + ) + conn.commit() + + response = client.post( + "/chats/chat_bot_a/drawer/memory/pov-summary", + data={"memory_id": str(memory_id), "new_summary": "..."}, + ) + assert response.status_code == 404 + + +def test_edit_edge_knowledge_facts_add_emits_event_and_appends(client, tmp_path): + _seed(tmp_path / "test.db") + response = client.post( + "/chats/chat_bot_a/drawer/edge/knowledge-facts", + data={ + "source_id": "bot_a", + "target_id": "you", + "action": "add", + "fact": "lent you a textbook", + }, + ) + assert response.status_code == 200 + + with open_db(tmp_path / "test.db") as conn: + rows = conn.execute( + "SELECT payload_json FROM event_log WHERE kind = 'manual_edit'" + ).fetchall() + assert len(rows) == 1 + payload = json.loads(rows[0][0]) + assert payload["target_kind"] == "edge_knowledge_fact" + assert payload["new_value"] == { + "action": "add", + "fact": "lent you a textbook", + } + # Prior value snapshots the entire knowledge list before the edit. + assert payload["prior_value"] == ["studied physics together"] + + from chat.state.edges import get_edge + + edge = get_edge(conn, "bot_a", "you") + assert "lent you a textbook" in edge["knowledge"] + assert "studied physics together" in edge["knowledge"] + + assert "lent you a textbook" in response.text + + +def test_edit_edge_knowledge_facts_remove_drops_matching_fact(client, tmp_path): + _seed(tmp_path / "test.db") + response = client.post( + "/chats/chat_bot_a/drawer/edge/knowledge-facts", + data={ + "source_id": "bot_a", + "target_id": "you", + "action": "remove", + "fact": "studied physics together", + }, + ) + assert response.status_code == 200 + + with open_db(tmp_path / "test.db") as conn: + from chat.state.edges import get_edge + + edge = get_edge(conn, "bot_a", "you") + assert "studied physics together" not in edge["knowledge"] + + rows = conn.execute( + "SELECT payload_json FROM event_log WHERE kind = 'manual_edit'" + ).fetchall() + payload = json.loads(rows[0][0]) + assert payload["target_kind"] == "edge_knowledge_fact" + assert payload["new_value"]["action"] == "remove" + + +def test_edit_edge_knowledge_facts_400_on_bad_action(client, tmp_path): + _seed(tmp_path / "test.db") + response = client.post( + "/chats/chat_bot_a/drawer/edge/knowledge-facts", + data={ + "source_id": "bot_a", + "target_id": "you", + "action": "delete", + "fact": "x", + }, + ) + assert response.status_code == 400 + + -- 2.52.0 From c265e4ce0f1e1b69d385e4cfc1339e66288bcd5e Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:26:31 -0400 Subject: [PATCH 08/17] feat: first-meeting gate on drawer Add-guest form (T72.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a host->candidate edge already exists from a prior chat, the Add-guest form renders the prose textarea disabled with an "already know each other" note. Submission without the explicit "re-seed anyway" toggle skips seed_inter_bot_edges so existing edge content (affinity, trust, knowledge, summaries) survives — guest_added and group_node_initialized still fire. A small inline script enables / disables the textarea per-option based on a pre-computed existing_guest_edges dict surfaced by the GET handler. --- chat/templates/_drawer.html | 53 +++++++++++- chat/web/drawer.py | 47 ++++++++--- tests/test_drawer_guest.py | 156 ++++++++++++++++++++++++++++++++++++ 3 files changed, 242 insertions(+), 14 deletions(-) diff --git a/chat/templates/_drawer.html b/chat/templates/_drawer.html index b6d2033..228a440 100644 --- a/chat/templates/_drawer.html +++ b/chat/templates/_drawer.html @@ -100,24 +100,71 @@

Add guest

{% if available_guests %} -
+

+ they already know each other (edge exists from a prior chat) +

+
+ {% else %}

No other bots authored yet.

{% endif %} diff --git a/chat/web/drawer.py b/chat/web/drawer.py index 341dce7..a2928ff 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -113,6 +113,14 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)): available_guests = [ b for b in list_bots(conn) if b["id"] != chat["host_bot_id"] ] + # T72.2 first-meeting gate: pre-compute whether a host->candidate edge + # already exists. Template renders the prose textarea disabled and the + # POST handler skips ``seed_inter_bot_edges`` (preserving the existing + # edge content) unless the user explicitly toggles "re-seed anyway". + existing_guest_edges = { + b["id"]: get_edge(conn, chat["host_bot_id"], b["id"]) is not None + for b in available_guests + } group_node = get_group_node(conn, chat_id) # Recent memories from host's POV (witness_host = 1), most recent first. @@ -161,6 +169,7 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)): "edge_y2g": edge_y2g, "edge_g2y": edge_g2y, "available_guests": available_guests, + "existing_guest_edges": existing_guest_edges, "group_node": group_node, "recent_memories": recent_memories, "pinned": pinned, @@ -602,6 +611,7 @@ async def add_guest( request: Request, guest_bot_id: str = Form(...), relationship_prose: str = Form(""), + reseed: str = Form(""), conn=Depends(get_conn), client=Depends(get_llm_client), ): @@ -633,17 +643,32 @@ async def add_guest( detail=f"host bot not found: {chat['host_bot_id']}", ) - settings = request.app.state.settings - seed = await seed_inter_bot_edges( - client, - classifier_model=settings.classifier_model, - bot_a_id=chat["host_bot_id"], - bot_a_name=host_bot["name"], - bot_b_id=guest_bot_id, - bot_b_name=guest_bot["name"], - relationship_prose=relationship_prose, - timeout_s=settings.classifier_timeout_s, + # T72.2 first-meeting gate: when an edge already exists from a prior + # chat, the textarea is rendered disabled. Submission without the + # explicit "re-seed anyway" toggle skips ``seed_inter_bot_edges`` + # entirely so the existing edge content (affinity, trust, knowledge, + # summaries) survives. ``guest_added`` and ``group_node_initialized`` + # still fire so the chat picks up the new participant. + existing_edge = ( + get_edge(conn, chat["host_bot_id"], guest_bot_id) is not None ) + reseed_requested = reseed.lower() in ("1", "true", "on", "yes") + skip_seed = existing_edge and not reseed_requested + + settings = request.app.state.settings + if skip_seed: + seed = None + else: + seed = await seed_inter_bot_edges( + client, + classifier_model=settings.classifier_model, + bot_a_id=chat["host_bot_id"], + bot_a_name=host_bot["name"], + bot_b_id=guest_bot_id, + bot_b_name=guest_bot["name"], + relationship_prose=relationship_prose, + timeout_s=settings.classifier_timeout_s, + ) append_and_apply( conn, @@ -658,7 +683,7 @@ async def add_guest( # per-direction summary is set via the per-pov scene-close path # (T27), not direct edge_update. We therefore drop seed.*_summary # here; the deltas + knowledge_facts are what materializes. - if not _seed_is_default(seed): + if seed is not None and not _seed_is_default(seed): append_and_apply( conn, kind="edge_update", diff --git a/tests/test_drawer_guest.py b/tests/test_drawer_guest.py index a599c03..f0a14d6 100644 --- a/tests/test_drawer_guest.py +++ b/tests/test_drawer_guest.py @@ -265,6 +265,162 @@ def test_drawer_remove_guest_clears_and_closes_scene(client, tmp_path): assert kinds.index("scene_closed") < kinds.index("guest_removed") +# --- T72.2 first-meeting gate ---------------------------------------------- + + +def _seed_host_to_guest_edge(db: Path) -> None: + """Materialise a bot_a -> bot_b edge so the gate's check fires.""" + from chat.eventlog.log import append_and_apply + + with open_db(db) as conn: + append_and_apply( + conn, + kind="edge_update", + payload={ + "source_id": "bot_a", + "target_id": "bot_b", + "chat_id": "chat_bot_a", + "affinity_delta": 0, + "knowledge_facts": ["already met before"], + }, + ) + + +def test_add_guest_form_disables_prose_when_edge_exists(client, tmp_path): + """When host->candidate edge already exists, the GET partial renders + the textarea disabled and surfaces the "already know each other" + message so the user knows submitting will skip the seed. + """ + _seed_chat(tmp_path / "test.db") + _seed_host_to_guest_edge(tmp_path / "test.db") + + response = client.get("/chats/chat_bot_a/drawer") + assert response.status_code == 200 + body = response.text + # Note + disabled state both present. The textarea sits next to the + # ``add-guest-prose`` class so we can match it specifically. + assert "already know each other" in body + assert 'class="add-guest-prose"' in body + # The textarea for the first (auto-selected) candidate should be + # disabled in the initial markup since an edge exists. + assert "disabled" in body.split('class="add-guest-prose"', 1)[1].split(">", 1)[0] + # And the option carries the ``data-existing-edge="true"`` attribute + # the inline JS uses to flip state on subsequent select changes. + assert 'data-existing-edge="true"' in body + + +def test_add_guest_with_existing_edge_skips_seed_call(client, tmp_path): + """Submitting the Add-guest form WITHOUT toggling re-seed must skip + ``seed_inter_bot_edges`` entirely. We assert this via an empty mock + queue: if the seed function had been called it would have consumed + a canned response (or raised because none was available). + """ + _seed_chat(tmp_path / "test.db") + _seed_host_to_guest_edge(tmp_path / "test.db") + + # Empty queue: any classifier call would raise inside MockLLMClient. + canned_queue: list[str] = [] + _override_llm(canned_queue) + try: + response = client.post( + "/chats/chat_bot_a/drawer/guest/add", + data={ + "guest_bot_id": "bot_b", + "relationship_prose": "ignored prose", + # NO reseed flag — gate should suppress the seed call. + }, + ) + assert response.status_code == 200 + finally: + app.dependency_overrides.clear() + + with open_db(tmp_path / "test.db") as conn: + from chat.state.edges import get_edge + from chat.state.world import get_chat + + chat = get_chat(conn, "chat_bot_a") + assert chat["guest_bot_id"] == "bot_b" + + # The pre-seeded knowledge fact survives — proof the seed didn't run + # and overwrite the existing edge. + edge = get_edge(conn, "bot_a", "bot_b") + assert "already met before" in edge["knowledge"] + + # Exactly one guest_added; no new edge_update events between + # bot_a and bot_b (the pre-seed edge_update from the test setup + # is the only edge_update on this pair). + added = conn.execute( + "SELECT COUNT(*) FROM event_log WHERE kind = 'guest_added'" + ).fetchone()[0] + assert added == 1 + + edge_updates = conn.execute( + "SELECT payload_json FROM event_log WHERE kind = 'edge_update'" + ).fetchall() + # Only the pre-seed edge_update from _seed_host_to_guest_edge. + ab_updates = [ + json.loads(p[0]) + for p in edge_updates + if { + json.loads(p[0]).get("source_id"), + json.loads(p[0]).get("target_id"), + } + == {"bot_a", "bot_b"} + ] + assert len(ab_updates) == 1 + assert ab_updates[0]["knowledge_facts"] == ["already met before"] + + +def test_add_guest_with_existing_edge_and_reseed_runs_seed(client, tmp_path): + """Toggling ``re-seed anyway`` flips the gate off — the existing + flow runs (seed produces deltas, two ``edge_update`` events fire). + """ + _seed_chat(tmp_path / "test.db") + _seed_host_to_guest_edge(tmp_path / "test.db") + + canned = json.dumps( + { + "a_to_b_summary": "reconnected", + "a_to_b_knowledge_facts": ["new fact"], + "a_to_b_affinity_delta": 2, + "a_to_b_trust_delta": 1, + "b_to_a_summary": "reconnected", + "b_to_a_knowledge_facts": [], + "b_to_a_affinity_delta": 1, + "b_to_a_trust_delta": 0, + } + ) + _override_llm([canned]) + try: + response = client.post( + "/chats/chat_bot_a/drawer/guest/add", + data={ + "guest_bot_id": "bot_b", + "relationship_prose": "fresh prose", + "reseed": "1", + }, + ) + assert response.status_code == 200 + finally: + app.dependency_overrides.clear() + + with open_db(tmp_path / "test.db") as conn: + edge_updates = conn.execute( + "SELECT payload_json FROM event_log WHERE kind = 'edge_update'" + ).fetchall() + # Pre-seed (1) + two from the re-seed = 3 edge_updates total. + ab_updates = [ + json.loads(p[0]) + for p in edge_updates + if { + json.loads(p[0]).get("source_id"), + json.loads(p[0]).get("target_id"), + } + == {"bot_a", "bot_b"} + ] + assert len(ab_updates) == 3 + + def test_drawer_with_guest_renders_guest_and_group_sections(client, tmp_path): _seed_chat(tmp_path / "test.db") from chat.eventlog.log import append_and_apply -- 2.52.0 From 607d0971c4565bd5cae7775a0b67095e950fc3ce Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:28:25 -0400 Subject: [PATCH 09/17] feat: drawer witness flag inline-edit (T72.3) Memories grow per-flag witness checkboxes (you / host / guest) that auto-submit on change via HTMX. The new POST route emits a manual_edit event with target_kind=memory_witness and a {flag, value} payload; prior_value mirrors the same shape so an inverse edit restores the flag. The drawer's recent-memories query now selects the three witness columns alongside the existing fields so the template can render checkbox state without a second query per row. --- chat/state/manual_edit.py | 18 +++++++ chat/templates/_drawer.html | 17 ++++++ chat/web/drawer.py | 73 ++++++++++++++++++++++++- tests/test_drawer_edits_extended.py | 82 +++++++++++++++++++++++++++-- 4 files changed, 185 insertions(+), 5 deletions(-) diff --git a/chat/state/manual_edit.py b/chat/state/manual_edit.py index 57e6c3b..3bfff79 100644 --- a/chat/state/manual_edit.py +++ b/chat/state/manual_edit.py @@ -24,6 +24,12 @@ T72.1 (Phase 2.5) adds one list-shaped edit: by string equality so the route handler doesn't have to track fact indices across re-renders. +T72.3 adds a per-flag witness toggle: +- ``memory_witness`` — flip one of ``witness_you`` / ``witness_host`` / + ``witness_guest`` on a memory row. Payload's ``new_value`` is a dict + ``{"flag": "you"|"host"|"guest", "value": 0|1}`` and ``prior_value`` + mirrors the same shape so an inverse edit can restore the flag. + Pin toggles intentionally use the existing ``memory_pin_changed`` event (registered in :mod:`chat.state.memory`) rather than ``manual_edit`` so the projection writes both ``pinned`` and ``auto_pinned`` atomically. @@ -37,6 +43,8 @@ from sqlite3 import Connection from chat.eventlog.log import Event from chat.eventlog.projector import on +_VALID_WITNESS_FLAGS = {"you", "host", "guest"} + def _clamp(value: int, lo: int, hi: int) -> int: return max(lo, min(hi, value)) @@ -120,5 +128,15 @@ def _apply_manual_edit(conn: Connection, e: Event) -> None: target_id["target_id"], ), ) + elif kind == "memory_witness": + # T72.3: toggle one of the three witness flags on a memory row. + # ``new_value`` is the dict ``{"flag", "value"}``; ``prior_value`` + # mirrors the same shape so an inverse edit restores the flag. + flag = new_value["flag"] + if flag in _VALID_WITNESS_FLAGS: + conn.execute( + f"UPDATE memories SET witness_{flag} = ? WHERE id = ?", + (1 if int(new_value["value"]) else 0, int(target_id)), + ) # Unknown target_kind: silently no-op for v1. Future kinds (activity # fields, etc.) extend the dispatch above. diff --git a/chat/templates/_drawer.html b/chat/templates/_drawer.html index 228a440..2cf48a7 100644 --- a/chat/templates/_drawer.html +++ b/chat/templates/_drawer.html @@ -347,6 +347,23 @@ +
+ {% for flag in ['you', 'host', 'guest'] %} + {% set witnessed = m['witness_' ~ flag] %} +
+ + + + +
+ {% endfor %} +
Edit POV summary
Date: Sun, 26 Apr 2026 17:36:16 -0400 Subject: [PATCH 10/17] feat: regenerate broadcasts turn_html over SSE (T73.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the new assistant_turn lands, publish a `turn_html_replace` SSE event carrying the rendered HTML, the new turn_id, and the original assistant_turn id as `supersedes_id` so connected tabs can swap the prior DOM node in-place. Phase 1 T29 deferred this — page had to refresh to see the regenerated turn. Uses a new event name (not the existing `turn_html`) because the HTMX `sse-swap="turn_html"` consumer expects raw HTML and an *append* semantic; regenerate is a *replace*. The new event ships as JSON (supersedes_id forces sse.py's JSON branch) so the front-end JS can read the swap target from the payload. Test: `test_regenerate_broadcasts_turn_html_over_sse` patches the `publish` reference inside the regenerate module and asserts the event shape. --- chat/services/regenerate.py | 44 ++++++++++++++++++++ tests/test_regenerate.py | 82 +++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/chat/services/regenerate.py b/chat/services/regenerate.py index eff02a2..6c1abad 100644 --- a/chat/services/regenerate.py +++ b/chat/services/regenerate.py @@ -26,6 +26,7 @@ Phase 1 simplifications (per the plan's "bound it" guidance): so affinity/trust/knowledge reflect the new output. - The route does not broadcast a fresh ``turn_html`` SSE event; T34 polishes UI swaps. The user refreshes the page to see the new turn. + *(T73.1 closed this gap — see Phase 2.5 changes below.)* Phase 2 changes (T44): @@ -42,6 +43,27 @@ Phase 2 changes (T44): is not invoked here. If the prior turn fired an interjection it remains attached to the original assistant_turn (which is superseded alongside the regenerated turn) — Phase 2.5 will revisit. + +Phase 2.5 changes: + +- T73.1: After the new ``assistant_turn`` lands we publish a + ``turn_html_replace`` SSE event carrying the rendered HTML for the + regenerated turn plus the original assistant_turn's event_id as + ``supersedes_id`` so connected tabs can swap the prior DOM node + in-place. We use a NEW event name (rather than re-using ``turn_html``) + because the existing HTMX ``sse-swap="turn_html"`` consumer expects a + raw-HTML body and an *append* semantic; ``turn_html_replace`` is a + JSON payload (sse.py auto-serialises when extra keys accompany + ``data``) so the front-end JS can read ``supersedes_id`` and replace + the right node. +- T73.2: Interjection regeneration. When the original assistant_turn + group included an interjection beat we redo BOTH the primary and the + interjection — re-running ``detect_interjection`` against the new + primary text. If the classifier returns False this time we supersede + the original interjection without appending a replacement. +- T73.3: The defensive degrade-to-1:1 for stale ``guest_bot_id`` + references was removed — Phase 2 T47 fixed the root cause (resets + clear the reference) so the guard is dead code. """ from __future__ import annotations @@ -58,6 +80,7 @@ from chat.state.edges import get_edge from chat.state.entities import get_bot, get_you from chat.state.world import active_scene, get_chat from chat.web.pubsub import publish +from chat.web.render import render_turn_html async def regenerate_assistant_turn( @@ -238,6 +261,27 @@ async def regenerate_assistant_turn( (new_assistant_event_id, original_assistant_event_id), ) + # 7a. Broadcast a turn_html_replace SSE event so connected tabs can + # swap the prior assistant_turn DOM node in-place (T73.1, Phase 1.5 + # backlog #2). Uses a separate event name from post_turn's + # ``turn_html`` (which is append-only) because regenerate is a + # *replace* operation — see module docstring for the rationale. + speaker_name_for_render = ( + speaker_bot.get("name", "bot") if speaker_bot is not None else "bot" + ) + new_turn_html = render_turn_html( + speaker_name_for_render, new_text, role="bot" + ) + await publish( + chat_id, + { + "event": "turn_html_replace", + "data": new_turn_html, + "turn_id": new_assistant_event_id, + "supersedes_id": original_assistant_event_id, + }, + ) + # 8. Re-run downstream classifier passes (memory write + state update # for every directed pair across present entities). Significance is # intentionally skipped on regenerate (the prior score remains diff --git a/tests/test_regenerate.py b/tests/test_regenerate.py index b561d5f..f923dd5 100644 --- a/tests/test_regenerate.py +++ b/tests/test_regenerate.py @@ -271,3 +271,85 @@ def test_regenerate_404_when_assistant_turn_missing(client, tmp_path): assert response.status_code == 404 finally: app.dependency_overrides.clear() + + +def test_regenerate_broadcasts_turn_html_over_sse( + tmp_path, monkeypatch +): + """T73.1: regenerate publishes a ``turn_html_replace`` SSE event so + connected tabs swap the prior turn's DOM node in place. + + The event carries: + - ``data``: rendered HTML for the new turn + - ``turn_id``: event_id of the new assistant_turn + - ``supersedes_id``: event_id of the original assistant_turn + """ + import asyncio + + from chat.config import Settings + from chat.db.migrate import apply_migrations + from chat.services import regenerate as regenerate_module + from chat.services.regenerate import regenerate_assistant_turn + + db_path = tmp_path / "test.db" + cfg = tmp_path / "config.toml" + cfg.write_text('featherless_api_key = "test"\n') + monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg)) + monkeypatch.setenv("CHAT_DB_PATH", str(db_path)) + apply_migrations(db_path) + + ut_id, at_id = _seed_with_one_turn(db_path) + + published: list[tuple[str, dict]] = [] + + async def _capture(chat_id, event): + published.append((chat_id, event)) + + # Patch the imported reference inside the regenerate module so the + # service's call site goes through our spy. + monkeypatch.setattr(regenerate_module, "publish", _capture) + + narrative_canned = "Refreshed reply." + state_canned = json.dumps( + {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []} + ) + canned = [narrative_canned, state_canned, state_canned] + mock_client = MockLLMClient(canned=list(canned)) + + settings = Settings(featherless_api_key="test") + + with open_db(db_path) as conn: + new_text = asyncio.run( + regenerate_assistant_turn( + conn, + mock_client, + settings=settings, + chat_id="chat_bot_a", + original_assistant_event_id=at_id, + ) + ) + assert new_text == narrative_canned + + # Find the new assistant_turn event_id for cross-checking. + cur = conn.execute( + "SELECT id FROM event_log " + "WHERE kind = 'assistant_turn' AND id != ? " + "AND superseded_by IS NULL", + (at_id,), + ).fetchone() + new_at_id = cur[0] + + # Filter out per-token publishes; we want the replace broadcast. + replace_calls = [ + ev for (_cid, ev) in published if ev.get("event") == "turn_html_replace" + ] + assert len(replace_calls) == 1 + payload = replace_calls[0] + assert payload["supersedes_id"] == at_id + assert payload["turn_id"] == new_at_id + # The HTML carries the new narrative text and the speaker name. + assert "Refreshed reply." in payload["data"] + assert "BotA" in payload["data"] + # Sanity: every publish targeted this chat. + for cid, _ev in published: + assert cid == "chat_bot_a" -- 2.52.0 From c874883a849aa2c5efadd317dec067b550f02f82 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:37:26 -0400 Subject: [PATCH 11/17] feat: classifier-based addressee detection (T74.1) Replace the substring _detect_addressee_id helper with a classifier call for the multi-entity case. The substring helper is kept as a fast-path for the no-guest case (no LLM round-trip needed when only one bot is present, preserves throughput). - New service chat/services/addressee.py wrapping the existing classifier wrapper. AddresseeDecision carries addressee_id + confidence + reason; classifier failure falls back to the host with reason="fallback" (graceful-degradation, matches the relationship_seed / interjection pattern). - chat/web/turns.py post_turn now calls detect_addressee in the multi-entity branch; 1:1 keeps the substring path. - tests/test_addressee.py: 3 new tests (guest pick, host pick, classifier-failure fallback). - tests/test_turn_flow.py: existing multi-entity tests now feed a canned addressee response in the queue. The addressee-routing test is updated to assert classifier-driven routing rather than substring. --- chat/services/addressee.py | 108 +++++++++++++++++++++++++++++++++++++ chat/web/turns.py | 22 +++++++- tests/test_addressee.py | 99 ++++++++++++++++++++++++++++++++++ tests/test_turn_flow.py | 79 ++++++++++++++++++--------- 4 files changed, 280 insertions(+), 28 deletions(-) create mode 100644 chat/services/addressee.py create mode 100644 tests/test_addressee.py diff --git a/chat/services/addressee.py b/chat/services/addressee.py new file mode 100644 index 0000000..e085d79 --- /dev/null +++ b/chat/services/addressee.py @@ -0,0 +1,108 @@ +"""Addressee classifier service (T74.1). + +Phase 2 (T44) detected the addressee — host vs. guest — with a simple +case-insensitive whole-word substring match against the bots' names. +That worked for the obvious case ("BotB, what do you think?") but lost +the long tail: pronouns, paraphrases, indirect address, narrative +focus on a particular party. T74.1 swaps the substring helper for a +classifier call that reads the prose holistically. + +The substring helper in :mod:`chat.web.turns` is kept as a fast-path +for the no-guest case (only one bot present means there is nothing to +classify) and as a non-breaking fallback for the regenerate path. The +multi-entity branch in :func:`chat.web.turns.post_turn` calls +:func:`detect_addressee` from this module. + +Failure mode: classifier flake or low-confidence response degrades to +the host (the default speaker per Phase 2's host-keeps-the-floor +bias). The decision carries ``confidence`` and ``reason`` so callers +that want to log degraded decisions can distinguish a real "host" call +from a fallback. +""" + +from __future__ import annotations + +from pydantic import BaseModel + +from chat.llm.classify import classify +from chat.llm.client import LLMClient + + +class AddresseeDecision(BaseModel): + """Which present bot the user is addressing. + + ``addressee_id`` is the chosen bot's id. ``confidence`` is one of + ``"high"`` / ``"medium"`` / ``"low"`` — callers may treat ``"low"`` + as a soft fallback to the host. ``reason`` is a short free-form + string. The classifier-failure fallback uses ``reason="fallback"`` + so it's distinguishable from a real low-confidence call. + """ + + addressee_id: str + confidence: str = "medium" # "high" | "medium" | "low" + reason: str = "" + + +_SYSTEM = ( + "Given a user's turn prose and the names of present bots, decide " + "which bot the user is addressing. If the user is speaking to no " + "specific bot (descriptive narration, action without dialogue), " + "default to the host. Output strict JSON matching the schema. " + "The addressee_id MUST be one of the ids supplied in the user " + "message — do not invent ids." +) + + +async def detect_addressee( + client: LLMClient, + *, + classifier_model: str, + user_prose: str, + host_id: str, + host_name: str, + guest_id: str | None, + guest_name: str | None, + timeout_s: float = 30.0, +) -> AddresseeDecision: + """Classify which present bot the user is addressing. + + Defaults to host on classifier failure or when the classifier picks + an id that isn't one of the supplied ids. The caller is expected to + only invoke this in the multi-entity case (a guest is present); + when no guest is present the substring fast-path in + :mod:`chat.web.turns` is used instead and this function is not + called. + """ + fallback = AddresseeDecision( + addressee_id=host_id, confidence="low", reason="fallback" + ) + user = ( + f"Host: {host_name} (id={host_id})\n" + + ( + f"Guest: {guest_name} (id={guest_id})\n" + if guest_id is not None + else "" + ) + + f"\nUser prose:\n{user_prose}" + ) + decision = await classify( + client, + model=classifier_model, + system=_SYSTEM, + user=user, + schema=AddresseeDecision, + default=fallback, + timeout_s=timeout_s, + ) + # Defensive: if the classifier returned an id outside the supplied + # set, treat it as a fallback to the host. This catches pathological + # outputs that pass schema validation but pick a phantom id. + valid_ids = {host_id} + if guest_id is not None: + valid_ids.add(guest_id) + if decision.addressee_id not in valid_ids: + return fallback + return decision + + +__all__ = ["AddresseeDecision", "detect_addressee"] diff --git a/chat/web/turns.py b/chat/web/turns.py index 940afbf..4309b8e 100644 --- a/chat/web/turns.py +++ b/chat/web/turns.py @@ -55,6 +55,7 @@ from fastapi import APIRouter, Depends, Form, HTTPException, Request from fastapi.responses import HTMLResponse, RedirectResponse, Response from chat.eventlog.log import append_and_apply, append_event +from chat.services.addressee import detect_addressee from chat.services.background import SignificanceJob from chat.services.interjection import detect_interjection from chat.services.memory_write import record_turn_memory_for_present @@ -262,8 +263,25 @@ async def post_turn( # 3. Determine the addressee. Done before assistant_turn_started so the # placeholder reflects the bot the user is actually talking to (host - # in 1:1, host-or-guest in multi-entity). - addressee_id = _detect_addressee_id(prose, host_bot, guest_bot) + # in 1:1, host-or-guest in multi-entity). T74.1 routes the multi-entity + # case through the addressee classifier; the no-guest case still uses + # the substring fast-path because there is nothing to classify when + # only one bot is present (and a classifier round-trip there would + # just be throughput overhead). + if guest_bot is None: + addressee_id = _detect_addressee_id(prose, host_bot, guest_bot) + else: + decision = await detect_addressee( + client, + classifier_model=settings.classifier_model, + user_prose=prose, + host_id=host_bot["id"], + host_name=host_bot["name"], + guest_id=guest_bot["id"], + guest_name=guest_bot["name"], + timeout_s=settings.classifier_timeout_s, + ) + addressee_id = decision.addressee_id addressee_bot = ( guest_bot if (guest_bot is not None and addressee_id == guest_bot["id"]) else host_bot diff --git a/tests/test_addressee.py b/tests/test_addressee.py new file mode 100644 index 0000000..71954cf --- /dev/null +++ b/tests/test_addressee.py @@ -0,0 +1,99 @@ +"""Addressee classifier service tests (T74.1). + +Covers :func:`chat.services.addressee.detect_addressee`: + +- Classifier picks the guest -> ``addressee_id == guest_id``. +- Classifier picks the host -> ``addressee_id == host_id``. +- Classifier flakes (3 bad-JSON responses, exhausting the built-in + retry budget in :func:`chat.llm.classify.classify`) -> fallback to + the host with ``reason="fallback"``. +""" + +from __future__ import annotations + +import json + +import pytest + +from chat.llm.mock import MockLLMClient +from chat.services.addressee import AddresseeDecision, detect_addressee + + +@pytest.mark.asyncio +async def test_classifier_picks_guest(): + """Classifier returns the guest id verbatim — caller propagates it.""" + canned = [ + json.dumps( + { + "addressee_id": "bot_b", + "confidence": "high", + "reason": "user named BotB", + } + ) + ] + client = MockLLMClient(canned=canned) + + result = await detect_addressee( + client, + classifier_model="test-model", + user_prose="BotB, what do you think?", + host_id="bot_a", + host_name="BotA", + guest_id="bot_b", + guest_name="BotB", + ) + + assert isinstance(result, AddresseeDecision) + assert result.addressee_id == "bot_b" + assert result.confidence == "high" + + +@pytest.mark.asyncio +async def test_classifier_picks_host(): + """Classifier returns the host id — caller propagates it.""" + canned = [ + json.dumps( + { + "addressee_id": "bot_a", + "confidence": "medium", + "reason": "narration aimed at host", + } + ) + ] + client = MockLLMClient(canned=canned) + + result = await detect_addressee( + client, + classifier_model="test-model", + user_prose="I lean back and stretch.", + host_id="bot_a", + host_name="BotA", + guest_id="bot_b", + guest_name="BotB", + ) + + assert result.addressee_id == "bot_a" + assert result.confidence == "medium" + + +@pytest.mark.asyncio +async def test_classifier_failure_falls_back_to_host(): + """Three bad-JSON responses exhaust the retry budget and the + classifier-failure fallback returns ``host_id`` with + ``reason="fallback"``.""" + canned = ["not json", "still not json", "garbage"] + client = MockLLMClient(canned=canned) + + result = await detect_addressee( + client, + classifier_model="test-model", + user_prose="anything", + host_id="bot_a", + host_name="BotA", + guest_id="bot_b", + guest_name="BotB", + ) + + assert result.addressee_id == "bot_a" + assert result.reason == "fallback" + assert result.confidence == "low" diff --git a/tests/test_turn_flow.py b/tests/test_turn_flow.py index 7d04755..665dc0c 100644 --- a/tests/test_turn_flow.py +++ b/tests/test_turn_flow.py @@ -405,14 +405,15 @@ def test_multi_bot_turn_no_interjection(app_state_setup, tmp_path): 1 user_turn + 1 assistant_turn + 6 *post-turn* edge_updates + 2 memory_written events. Single turn_html broadcast. - Canned queue (8 calls): + Canned queue (11 calls): 1. parse_turn - 2. narrative stream (primary, addressee = host because the prose + 2. detect_addressee (T74.1) -> host + 3. narrative stream (primary, addressee = host because the prose doesn't name the guest) - 3-8. 6 state-update calls (one per directed pair across {you, + 4-9. 6 state-update calls (one per directed pair across {you, bot_a, bot_b}) - 9. detect_interjection -> should_interject=False - 10. detect_scene_close -> should_close=False + 10. detect_interjection -> should_interject=False + 11. detect_scene_close -> should_close=False """ _seed_chat_with_guest(tmp_path / "test.db") canned_parse = json.dumps( @@ -420,6 +421,9 @@ def test_multi_bot_turn_no_interjection(app_state_setup, tmp_path): ) canned = [ canned_parse, + json.dumps( + {"addressee_id": "bot_a", "confidence": "medium", "reason": "host"} + ), "Greetings.", _zero_state(), _zero_state(), _zero_state(), _zero_state(), _zero_state(), _zero_state(), @@ -474,14 +478,15 @@ def test_multi_bot_turn_with_interjection(app_state_setup, tmp_path): 1 user_turn + 2 assistant_turns + (6 + 6) post-turn edge_updates + 4 memory_written events. - Canned queue (16 calls): + Canned queue (17 calls): 1. parse_turn - 2. narrative stream (primary) - 3-8. 6 state-update calls (post-primary) - 9. detect_interjection -> should_interject=True - 10. narrative stream (interjection) - 11-16. 6 state-update calls (post-interjection) - 17. detect_scene_close -> should_close=False + 2. detect_addressee (T74.1) -> host + 3. narrative stream (primary) + 4-9. 6 state-update calls (post-primary) + 10. detect_interjection -> should_interject=True + 11. narrative stream (interjection) + 12-17. 6 state-update calls (post-interjection) + 18. detect_scene_close -> should_close=False """ _seed_chat_with_guest(tmp_path / "test.db") canned_parse = json.dumps( @@ -489,6 +494,9 @@ def test_multi_bot_turn_with_interjection(app_state_setup, tmp_path): ) canned = [ canned_parse, + json.dumps( + {"addressee_id": "bot_a", "confidence": "medium", "reason": "host"} + ), "Primary beat.", _zero_state(), _zero_state(), _zero_state(), _zero_state(), _zero_state(), _zero_state(), @@ -555,14 +563,15 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries( rewrites fire for both bots (memory.pov_summary changes for each). Interjection short-circuits at False so the queue stays compact. - Canned queue (12 calls): + Canned queue (13 calls): 1. parse_turn - 2. narrative stream (primary) - 3-8. 6 state-update calls - 9. detect_interjection -> False (no follow-on stream) - 10. detect_scene_close -> True - 11. apply_scene_close_summary host POV - 12. apply_scene_close_summary guest POV + 2. detect_addressee (T74.1) -> host + 3. narrative stream (primary) + 4-9. 6 state-update calls + 10. detect_interjection -> False (no follow-on stream) + 11. detect_scene_close -> True + 12. apply_scene_close_summary host POV + 13. apply_scene_close_summary guest POV """ _seed_chat_with_guest(tmp_path / "test.db") canned_parse = json.dumps( @@ -588,6 +597,9 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries( ) canned = [ canned_parse, + json.dumps( + {"addressee_id": "bot_a", "confidence": "medium", "reason": "host"} + ), "Goodnight.", _zero_state(), _zero_state(), _zero_state(), _zero_state(), _zero_state(), _zero_state(), @@ -639,12 +651,20 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries( def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path): - """Prose that names the guest by name routes the primary turn to the - guest. Interjection (when fired) makes the host the silent witness - and the second assistant_turn carries the host as speaker. + """T74.1: the multi-entity addressee call goes through the classifier; + when the classifier returns the guest, the primary turn routes there. + Interjection (when fired) makes the host the silent witness and the + second assistant_turn carries the host as speaker. - Canned queue: same shape as the with-interjection test (16 calls) - plus the trailing scene_close decision. + Canned queue (with classifier-led addressee = guest): + 1. parse_turn + 2. detect_addressee -> bot_b (the guest) + 3. narrative stream (primary, addressee = guest) + 4-9. 6 state-update calls + 10. detect_interjection -> True + 11. interjection narrative stream + 12-17. 6 state-update calls (post-interjection) + 18. detect_scene_close -> False """ _seed_chat_with_guest(tmp_path / "test.db") canned_parse = json.dumps( @@ -652,6 +672,13 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path): ) canned = [ canned_parse, + json.dumps( + { + "addressee_id": "bot_b", + "confidence": "high", + "reason": "user named BotB", + } + ), "BotB pondering.", _zero_state(), _zero_state(), _zero_state(), _zero_state(), _zero_state(), _zero_state(), @@ -680,8 +707,8 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path): primary_payload = json.loads(rows[0][0]) interjection_payload = json.loads(rows[1][0]) - # Primary speaker is the guest because the prose names BotB and not - # BotA (case-insensitive whole-word match). + # Primary speaker is the guest because the addressee classifier + # picked bot_b for the prose ("BotB, what do you think?"). assert primary_payload["speaker_id"] == "bot_b" # Interjection follow-on goes to the silent witness — the host. assert interjection_payload["speaker_id"] == "bot_a" -- 2.52.0 From 88fae3315229e5add1e93d28b38fbe71b74a5886 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:38:30 -0400 Subject: [PATCH 12/17] fix: enqueue significance for interjection memories (T74.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T44's interjection branch wrote interjection memories via record_turn_memory_for_present but never enqueued a SignificanceJob, so the interjection beat could land in memory but never be scored — which meant it could never auto-pin even when it carried a pivotal moment. - Capture the host-POV memory id from the interjection's memory write result and enqueue a SignificanceJob mirroring the primary turn's pattern. One enqueue per beat (host id; guest POV piggybacks on the same score since the prose is identical for v2 — per-POV rewrite happens at scene close in T45). - New test test_interjection_enqueues_significance_job pins the contract by intercepting worker.enqueue and asserting two distinct jobs land per 3-entity turn that fires an interjection. --- chat/web/turns.py | 29 ++++++++++++++++++- tests/test_turn_flow.py | 62 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/chat/web/turns.py b/chat/web/turns.py index 4309b8e..ab1308c 100644 --- a/chat/web/turns.py +++ b/chat/web/turns.py @@ -616,7 +616,7 @@ async def post_turn( # Memory write for the interjection beat — a second pair # of memory_written events (host + guest POVs). - record_turn_memory_for_present( + interject_memory_results = record_turn_memory_for_present( conn, chat_id=chat_id, host_bot_id=host_bot["id"], @@ -626,6 +626,33 @@ async def post_turn( chat_clock_at=chat.get("time"), ) + # T74.2: enqueue a significance pass for the interjection + # memory. Mirrors the primary-turn enqueue pattern above — + # we score on the host's memory id since the prose is + # identical across both POVs (per-POV rewrite happens at + # scene close in T45). Without this enqueue the + # interjection beat lands in memory but never gets scored, + # so it can never auto-pin even when it carries a pivotal + # moment. + interject_host_event = interject_memory_results.get( + host_bot["id"] + ) + interject_host_memory_id = ( + interject_host_event[1] if interject_host_event else None + ) + if ( + worker is not None + and interject_host_memory_id is not None + ): + worker.enqueue( + SignificanceJob( + memory_id=interject_host_memory_id, + narrative_text=interjection_text, + prior_dialogue=recent_post_interject, + host_bot_id=host_bot["id"], + ) + ) + # 9. Scene-close detection (Plan §7.2, T26). Runs AFTER assistant_turn # and the optional interjection so the bots' responses are part of # the closing scene's final beat — closing before narrative would diff --git a/tests/test_turn_flow.py b/tests/test_turn_flow.py index 665dc0c..281d123 100644 --- a/tests/test_turn_flow.py +++ b/tests/test_turn_flow.py @@ -713,3 +713,65 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path): # Interjection follow-on goes to the silent witness — the host. assert interjection_payload["speaker_id"] == "bot_a" assert interjection_payload["interjection_of"] == "bot_b" + + +def test_interjection_enqueues_significance_job(app_state_setup, tmp_path): + """T74.2: when an interjection fires, the interjection memory is + enqueued for significance scoring just like the primary memory. + + Capture enqueued ``SignificanceJob``s by replacing the background + worker's ``enqueue`` method with a list-append. Without T74.2, the + interjection memory would never be scored — only the primary's + enqueue would land. We therefore expect TWO jobs after a turn that + has both a primary and an interjection beat: one for the primary + memory, one for the interjection memory. + """ + _seed_chat_with_guest(tmp_path / "test.db") + canned_parse = json.dumps( + {"segments": [{"kind": "dialogue", "text": "tell me"}]} + ) + canned = [ + canned_parse, + json.dumps( + {"addressee_id": "bot_a", "confidence": "medium", "reason": "host"} + ), + "Primary beat.", + _zero_state(), _zero_state(), _zero_state(), + _zero_state(), _zero_state(), _zero_state(), + json.dumps({"should_interject": True, "reason": "jealous"}), + "Interjection beat!", + _zero_state(), _zero_state(), _zero_state(), + _zero_state(), _zero_state(), _zero_state(), + json.dumps({"should_close": False, "reason": "no signal"}), + ] + _override_llm(canned) + + captured_jobs: list = [] + worker = app.state.background_worker + # Re-enable enqueue capture even though the worker's loop is disabled + # — we want to count enqueues without the loop running classifier work. + worker.enabled = True + original_enqueue = worker.enqueue + worker.enqueue = captured_jobs.append # type: ignore[assignment] + + try: + response = app_state_setup.post( + "/chats/chat_bot_a/turns", data={"prose": "tell me"} + ) + assert response.status_code == 204 + finally: + worker.enqueue = original_enqueue # type: ignore[assignment] + worker.enabled = False + app.dependency_overrides.clear() + + # Expect 2 enqueues: 1 for the primary memory + 1 for the + # interjection memory. + assert len(captured_jobs) == 2 + + # Both jobs should reference distinct memory ids — the primary's + # host-POV memory and the interjection's host-POV memory. + memory_ids = [job.memory_id for job in captured_jobs] + assert len(set(memory_ids)) == 2 + # The two narrative texts should be the two streamed beats. + narrative_texts = sorted(job.narrative_text for job in captured_jobs) + assert narrative_texts == ["Interjection beat!", "Primary beat."] -- 2.52.0 From f2a57005e59b3331cb85fa110b7b06e5be9fb38b Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:39:31 -0400 Subject: [PATCH 13/17] feat: regenerate covers interjection turns (T73.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 T44 deferred interjection regenerate — when the original turn group included a follow-on interjection beat we left it untouched. Now regenerate redoes BOTH halves: - Detect a sibling interjection by looking up assistant_turn events pinned to the same user_turn_id with `interjection_of` set. - After streaming the new primary, run `detect_interjection` against the new primary text. - If True: stream a new interjection from the silent witness, append with `interjection_of=`, supersede the original interjection, and re-run memory + state-update for the new beat. - If False: supersede the original interjection without a replacement (back-pointer goes to the new primary so the row stays consistently hidden). Also broadcast a `turn_html_replace` event for the new interjection so the front-end can swap the prior interjection node in place (mirrors T73.1's primary swap). Tests: - `test_regenerate_with_interjection_redoes_both_turns`: classifier returns True; assert two new assistant_turns land for the same user_turn, second carries `interjection_of`, originals superseded. - `test_regenerate_drops_interjection_when_classifier_returns_false`: classifier returns False; assert one new assistant_turn (primary only) and the original interjection is superseded with no replacement. `interjection_of` carries the primary's *speaker_id* (matching the existing convention in chat/web/turns.py) rather than the event_id. --- chat/services/regenerate.py | 256 +++++++++++++++++++++++++++++ tests/test_regenerate.py | 309 ++++++++++++++++++++++++++++++++++++ 2 files changed, 565 insertions(+) diff --git a/chat/services/regenerate.py b/chat/services/regenerate.py index 6c1abad..c432d20 100644 --- a/chat/services/regenerate.py +++ b/chat/services/regenerate.py @@ -73,6 +73,7 @@ from sqlite3 import Connection from chat.config import Settings from chat.eventlog.log import append_and_apply, append_event +from chat.services.interjection import detect_interjection from chat.services.memory_write import record_turn_memory_for_present from chat.services.multi_state_update import compute_state_updates_for_present from chat.services.prompt import assemble_narrative_prompt @@ -131,6 +132,33 @@ async def regenerate_assistant_turn( raise ValueError("assistant_turn event not found") original_assistant_payload = json.loads(row[0]) original_user_turn_id = original_assistant_payload.get("user_turn_id") + + # 1a. Look up any sibling interjection beat in the same turn group + # (T73.2). The original group is (primary + optional interjection), + # both pinned to the same ``user_turn_id``. The interjection has a + # populated ``interjection_of`` field in its payload — its speaker is + # the silent witness (the bot that wasn't the primary addressee). + # Filter on ``superseded_by IS NULL`` so prior regenerates of this + # group don't reappear as siblings. + original_interjection_event_id: int | None = None + original_interjection_payload: dict | None = None + if original_user_turn_id is not None: + sibling_cur = conn.execute( + "SELECT id, payload_json FROM event_log " + "WHERE kind = 'assistant_turn' " + " AND id != ? " + " AND superseded_by IS NULL", + (original_assistant_event_id,), + ) + for sib_id, sib_payload_json in sibling_cur.fetchall(): + sib_payload = json.loads(sib_payload_json) + if sib_payload.get("user_turn_id") != original_user_turn_id: + continue + if not sib_payload.get("interjection_of"): + continue + original_interjection_event_id = sib_id + original_interjection_payload = sib_payload + break # Phase 2 v2 regenerates only the addressee turn — preserve whichever # bot the original turn was attributed to, falling back to the host # for legacy rows that pre-date multi-entity support. @@ -361,6 +389,234 @@ async def regenerate_assistant_turn( }, ) + # 9. Interjection regenerate branch (T73.2). When the original + # assistant_turn group included a follow-on interjection beat we need + # to revisit that beat against the regenerated primary. Three outcomes: + # + # - No original interjection: nothing to do; we already short-circuit + # above by leaving ``original_interjection_event_id`` as None. + # - Original interjection + classifier returns True: stream a fresh + # interjection from the silent witness, append it (with + # ``interjection_of`` linking to the new primary speaker), and + # supersede the original interjection's row. Also re-run memory + # + state-update so the second beat moves edges + writes memories. + # - Original interjection + classifier returns False: supersede the + # original interjection without appending a replacement. The + # regenerated group becomes "primary only" because the new primary + # no longer warrants a follow-on. No memory / state work needed + # for the absent beat. + # + # ``superseded_by`` on the original interjection's row points at the + # *new primary* in the no-replacement case (rather than NULL or a + # nonexistent id) so the row is consistently hidden by the standard + # ``superseded_by IS NULL`` timeline filter and the back-pointer + # leads somewhere meaningful for an "originally said …" affordance. + if original_interjection_event_id is not None and guest_bot is not None: + # Identify the silent witness from the original interjection's + # speaker_id (which is the bot that interjected last time). When + # we regenerate we keep the *same pair of present entities*, so + # the silent witness is whichever bot isn't the new primary + # speaker — derive it from present rather than reusing the prior + # speaker_id verbatim, in case the regenerated primary swapped + # who held the floor. + if speaker_bot_id == host_bot_id: + silent_witness = guest_bot + else: + silent_witness = host_bot + silent_witness_id = silent_witness.get("id") + + edge_w_to_addr = get_edge(conn, silent_witness_id, speaker_bot_id) or { + "affinity": 50, + "trust": 50, + "summary": "", + } + edge_w_to_you = get_edge(conn, silent_witness_id, "you") or { + "affinity": 50, + "trust": 50, + "summary": "", + } + + decision = await detect_interjection( + client, + classifier_model=settings.classifier_model, + addressee_name=speaker_bot.get("name", "bot"), + addressee_just_said=new_text, + silent_witness_name=silent_witness.get("name", "bot"), + silent_witness_persona=silent_witness.get("persona") or "", + silent_witness_edge_to_addressee=edge_w_to_addr, + silent_witness_edge_to_you=edge_w_to_you, + you_just_said=prose_for_prompt or "", + timeout_s=settings.classifier_timeout_s, + ) + + if decision.should_interject: + # Re-read recent so the just-appended primary is in the prompt. + interject_cur = conn.execute( + "SELECT id, kind, payload_json FROM event_log " + "WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') " + " AND superseded_by IS NULL AND hidden = 0 " + "ORDER BY id DESC LIMIT 20", + ) + interject_rows = list(reversed(interject_cur.fetchall())) + interject_recent: list[dict] = [] + for _eid, kind, payload_json in interject_rows: + p = json.loads(payload_json) + if p.get("chat_id") != chat_id: + continue + if kind in ("user_turn", "user_turn_edit"): + interject_recent.append( + {"speaker": you_name, "text": p.get("prose", "")} + ) + else: + spk = p.get("speaker_id", "bot") + if spk == host_bot_id: + spk_name = host_bot.get("name", "bot") + elif spk == guest_bot.get("id"): + spk_name = guest_bot.get("name", "bot") + else: + spk_name = "bot" + interject_recent.append( + {"speaker": spk_name, "text": p.get("text", "")} + ) + if interject_recent and interject_recent[-1].get("speaker") == you_name: + interject_recent = interject_recent[:-1] + + interject_messages = assemble_narrative_prompt( + conn, + chat_id=chat_id, + speaker_bot_id=silent_witness_id, + addressee=speaker_bot_id, + user_turn_prose=prose_for_prompt or None, + recent_dialogue=interject_recent, + budget_soft=settings.narrative_budget_soft, + budget_hard=settings.narrative_budget_hard, + guest_id=guest_bot_id, + ) + + interject_accumulated: list[str] = [] + async for chunk in client.stream( + interject_messages, + model=settings.narrative_model, + max_tokens=settings.narrative_max_tokens, + temperature=settings.narrative_temperature, + ): + interject_accumulated.append(chunk) + await publish( + chat_id, + { + "event": "token", + "text": chunk, + "speaker_id": silent_witness_id, + }, + ) + interject_text = "".join(interject_accumulated) + + new_interjection_event_id = append_event( + conn, + kind="assistant_turn", + payload={ + "chat_id": chat_id, + "speaker_id": silent_witness_id, + "text": interject_text, + "truncated": False, + "user_turn_id": ( + new_user_event_id + if new_user_event_id is not None + else original_user_turn_id + ), + "regenerated_from": original_interjection_event_id, + "interjection_of": speaker_bot_id, + }, + ) + + # Supersede the original interjection by the new one. + conn.execute( + "UPDATE event_log SET superseded_by = ? WHERE id = ?", + (new_interjection_event_id, original_interjection_event_id), + ) + + # Broadcast a replace event so connected tabs swap the prior + # interjection node in-place (mirrors T73.1's primary swap). + interject_html = render_turn_html( + silent_witness.get("name", "bot"), interject_text, role="bot" + ) + await publish( + chat_id, + { + "event": "turn_html_replace", + "data": interject_html, + "turn_id": new_interjection_event_id, + "supersedes_id": original_interjection_event_id, + }, + ) + + # Memory write for the new interjection beat (one event per + # present witness). + record_turn_memory_for_present( + conn, + chat_id=chat_id, + host_bot_id=host_bot_id, + guest_bot_id=guest_bot_id, + narrative_text=interject_text, + scene_id=scene["id"] if scene else None, + chat_clock_at=chat.get("time"), + ) + + # Re-run the multi-pair state-update with the post-interjection + # dialogue tail so deltas land on the post-primary baseline. + recent_post_interject = recent_for_update + [ + { + "speaker": silent_witness.get("name", "bot"), + "text": interject_text, + } + ] + prior_edges_post: dict[tuple[str, str], dict] = {} + for src in present_ids: + for tgt in present_ids: + if src == tgt: + continue + edge = get_edge(conn, src, tgt) or { + "affinity": 50, + "trust": 50, + "summary": "", + } + prior_edges_post[(src, tgt)] = edge + + state_updates_post = await compute_state_updates_for_present( + client, + classifier_model=settings.classifier_model, + present_ids=present_ids, + present_names=present_names, + personas=personas, + prior_edges=prior_edges_post, + recent_dialogue=recent_post_interject, + timeout_s=settings.classifier_timeout_s, + ) + for src_id, tgt_id, update in state_updates_post: + append_and_apply( + conn, + kind="edge_update", + payload={ + "source_id": src_id, + "target_id": tgt_id, + "chat_id": chat_id, + "affinity_delta": update.affinity_delta, + "trust_delta": update.trust_delta, + "knowledge_facts": update.knowledge_facts, + "last_interaction_at": last_at, + "last_interaction_chat_id": chat_id, + }, + ) + else: + # Classifier said "no follow-on this time" — supersede the + # original interjection without a replacement. Point the + # back-pointer at the new primary so the row is consistently + # hidden by the standard timeline filter. + conn.execute( + "UPDATE event_log SET superseded_by = ? WHERE id = ?", + (new_assistant_event_id, original_interjection_event_id), + ) + return new_text diff --git a/tests/test_regenerate.py b/tests/test_regenerate.py index f923dd5..7fa22bc 100644 --- a/tests/test_regenerate.py +++ b/tests/test_regenerate.py @@ -273,6 +273,114 @@ def test_regenerate_404_when_assistant_turn_missing(client, tmp_path): app.dependency_overrides.clear() +def _seed_with_interjection_group(db_path): + """Seed a multi-entity scene with a (primary + interjection) group. + + Returns ``(user_turn_id, primary_at_id, interjection_at_id)``. + + The primary speaker is the host (bot_a); the silent witness who + interjected is the guest (bot_b). Mirrors the convention in + chat/web/turns.py — both assistant_turns share the same + ``user_turn_id`` and the interjection's payload carries + ``interjection_of=``. + """ + with open_db(db_path) as conn: + for bot_id, name, persona in ( + ("bot_a", "BotA", "thoughtful"), + ("bot_b", "BotB", "loud"), + ): + append_event( + conn, + kind="bot_authored", + payload={ + "id": bot_id, + "name": name, + "persona": persona, + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "", + "kickoff_prose": "", + }, + ) + append_event( + conn, + kind="chat_created", + payload={ + "id": "chat_multi", + "host_bot_id": "bot_a", + "guest_bot_id": "bot_b", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + for src, tgt in ( + ("bot_a", "you"), + ("you", "bot_a"), + ("bot_b", "you"), + ("you", "bot_b"), + ("bot_a", "bot_b"), + ("bot_b", "bot_a"), + ): + append_event( + conn, + kind="edge_update", + payload={ + "source_id": src, + "target_id": tgt, + "chat_id": "chat_multi", + }, + ) + for entity_id in ("you", "bot_a", "bot_b"): + append_event( + conn, + kind="activity_change", + payload={ + "entity_id": entity_id, + "posture": "sitting", + "action": {"verb": "talking"}, + "attention": "", + "holding": [], + "status": {}, + }, + ) + ut_id = append_event( + conn, + kind="user_turn", + payload={ + "chat_id": "chat_multi", + "prose": "hello", + "segments": [], + }, + ) + primary_id = append_event( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_multi", + "speaker_id": "bot_a", + "text": "Original primary.", + "truncated": False, + "user_turn_id": ut_id, + }, + ) + interjection_id = append_event( + conn, + kind="assistant_turn", + payload={ + "chat_id": "chat_multi", + "speaker_id": "bot_b", + "text": "Original interjection!", + "truncated": False, + "user_turn_id": ut_id, + "interjection_of": "bot_a", + }, + ) + project(conn) + return ut_id, primary_id, interjection_id + + def test_regenerate_broadcasts_turn_html_over_sse( tmp_path, monkeypatch ): @@ -353,3 +461,204 @@ def test_regenerate_broadcasts_turn_html_over_sse( # Sanity: every publish targeted this chat. for cid, _ev in published: assert cid == "chat_bot_a" + + +def test_regenerate_with_interjection_redoes_both_turns(tmp_path, monkeypatch): + """T73.2: when the original turn group included an interjection, both + the primary and the interjection are regenerated. + + Setup: 3-entity scene (host BotA + guest BotB + you) with a prior + (primary by BotA + interjection by BotB) group. Mock the + interjection classifier to return ``should_interject=True`` so the + follow-on regenerates too. + + Assert: 2 new assistant_turns exist for the same user_turn_id, the + second carrying ``interjection_of`` pointing at the new primary's + speaker_id. Both originals are superseded. + """ + import asyncio + + from chat.config import Settings + from chat.db.migrate import apply_migrations + from chat.services import regenerate as regenerate_module + from chat.services.interjection import InterjectionDecision + from chat.services.regenerate import regenerate_assistant_turn + + db_path = tmp_path / "test.db" + cfg = tmp_path / "config.toml" + cfg.write_text('featherless_api_key = "test"\n') + monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg)) + monkeypatch.setenv("CHAT_DB_PATH", str(db_path)) + apply_migrations(db_path) + + ut_id, primary_id, interjection_id = _seed_with_interjection_group(db_path) + + # Stub detect_interjection so the classifier "fires" with new prose. + async def _stub_should_interject(*_args, **_kwargs): + return InterjectionDecision(should_interject=True, reason="fired") + + monkeypatch.setattr( + regenerate_module, "detect_interjection", _stub_should_interject + ) + + # Canned queue: + # 1. New primary narrative stream. + # 2-7. Six state-update classifier calls (one per directed pair + # across host/you/guest = 6 pairs) for the primary pass. + # 8. New interjection narrative stream. + # 9-14. Six state-update classifier calls for the post-interjection + # pass. + state_canned = json.dumps( + {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []} + ) + canned: list[str] = [] + canned.append("New primary text.") + canned.extend([state_canned] * 6) + canned.append("New interjection text!") + canned.extend([state_canned] * 6) + mock_client = MockLLMClient(canned=list(canned)) + + settings = Settings(featherless_api_key="test") + + with open_db(db_path) as conn: + new_text = asyncio.run( + regenerate_assistant_turn( + conn, + mock_client, + settings=settings, + chat_id="chat_multi", + original_assistant_event_id=primary_id, + ) + ) + assert new_text == "New primary text." + + # Both originals are superseded. + primary_super = conn.execute( + "SELECT superseded_by FROM event_log WHERE id = ?", (primary_id,) + ).fetchone()[0] + interjection_super = conn.execute( + "SELECT superseded_by FROM event_log WHERE id = ?", + (interjection_id,), + ).fetchone()[0] + assert primary_super is not None + assert interjection_super is not None + + # Two NEW assistant_turn events exist (the regenerated primary + # and the regenerated interjection), both pinned to the same + # user_turn_id as the originals. + cur = conn.execute( + "SELECT id, payload_json FROM event_log " + "WHERE kind = 'assistant_turn' AND id NOT IN (?, ?) " + "ORDER BY id", + (primary_id, interjection_id), + ).fetchall() + assert len(cur) == 2 + new_primary_id, new_primary_payload_json = cur[0] + new_interjection_id, new_interjection_payload_json = cur[1] + new_primary_payload = json.loads(new_primary_payload_json) + new_interjection_payload = json.loads(new_interjection_payload_json) + + assert new_primary_payload["text"] == "New primary text." + assert new_primary_payload["speaker_id"] == "bot_a" + assert new_primary_payload["user_turn_id"] == ut_id + assert new_primary_payload["regenerated_from"] == primary_id + assert "interjection_of" not in new_primary_payload + + assert new_interjection_payload["text"] == "New interjection text!" + assert new_interjection_payload["speaker_id"] == "bot_b" + assert new_interjection_payload["user_turn_id"] == ut_id + assert new_interjection_payload["regenerated_from"] == interjection_id + # interjection_of links to the new primary's speaker (matches + # the existing convention in chat/web/turns.py). + assert new_interjection_payload["interjection_of"] == "bot_a" + + # The originals' supersede pointers reach the new ones. + assert primary_super == new_primary_id + assert interjection_super == new_interjection_id + + +def test_regenerate_drops_interjection_when_classifier_returns_false( + tmp_path, monkeypatch +): + """T73.2: when the original group included an interjection but the + classifier returns False this time, the new group is primary-only. + + The original interjection is still superseded (we don't leave it + visible in the timeline alongside a regenerated primary it no longer + follows from), but no replacement assistant_turn is appended. + """ + import asyncio + + from chat.config import Settings + from chat.db.migrate import apply_migrations + from chat.services import regenerate as regenerate_module + from chat.services.interjection import InterjectionDecision + from chat.services.regenerate import regenerate_assistant_turn + + db_path = tmp_path / "test.db" + cfg = tmp_path / "config.toml" + cfg.write_text('featherless_api_key = "test"\n') + monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg)) + monkeypatch.setenv("CHAT_DB_PATH", str(db_path)) + apply_migrations(db_path) + + ut_id, primary_id, interjection_id = _seed_with_interjection_group(db_path) + + async def _stub_no_interject(*_args, **_kwargs): + return InterjectionDecision( + should_interject=False, reason="quiet" + ) + + monkeypatch.setattr( + regenerate_module, "detect_interjection", _stub_no_interject + ) + + # Canned queue: primary narrative + 6 state-update calls. No + # interjection stream because the classifier short-circuits. + state_canned = json.dumps( + {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []} + ) + canned: list[str] = ["New primary text."] + [state_canned] * 6 + mock_client = MockLLMClient(canned=list(canned)) + + settings = Settings(featherless_api_key="test") + + with open_db(db_path) as conn: + new_text = asyncio.run( + regenerate_assistant_turn( + conn, + mock_client, + settings=settings, + chat_id="chat_multi", + original_assistant_event_id=primary_id, + ) + ) + assert new_text == "New primary text." + + # Original primary superseded by the new primary. + primary_super = conn.execute( + "SELECT superseded_by FROM event_log WHERE id = ?", (primary_id,) + ).fetchone()[0] + # Original interjection ALSO superseded — we don't leave a + # dangling beat attached to a regenerated primary that no longer + # warrants a follow-on. Back-pointer goes to the new primary. + interjection_super = conn.execute( + "SELECT superseded_by FROM event_log WHERE id = ?", + (interjection_id,), + ).fetchone()[0] + assert primary_super is not None + assert interjection_super is not None + assert interjection_super == primary_super # both point at new primary + + # Exactly ONE new assistant_turn — the primary; no replacement + # interjection. + cur = conn.execute( + "SELECT payload_json FROM event_log " + "WHERE kind = 'assistant_turn' AND id NOT IN (?, ?) " + "AND superseded_by IS NULL", + (primary_id, interjection_id), + ).fetchall() + assert len(cur) == 1 + new_primary_payload = json.loads(cur[0][0]) + assert new_primary_payload["text"] == "New primary text." + assert "interjection_of" not in new_primary_payload -- 2.52.0 From bd13b64959bdf801c0037b61fd16a737b02da320 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:40:07 -0400 Subject: [PATCH 14/17] chore: remove defensive stale-guest degrade in regenerate.py (T73.3) Phase 2 T44 added a defensive degrade-to-1:1 here when `chat.guest_bot_id` pointed at a deleted bot. T47 fixed the root cause: `bot_reset` cascade-clears the column when the referenced bot is purged (verified by tests/test_reset.py), so the guard was dead code. No corresponding stale-guest test existed in tests/test_regenerate.py to remove. The bot_reset cascade test in tests/test_reset.py already covers the root-cause behavior. --- chat/services/regenerate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/chat/services/regenerate.py b/chat/services/regenerate.py index c432d20..6427092 100644 --- a/chat/services/regenerate.py +++ b/chat/services/regenerate.py @@ -114,13 +114,13 @@ async def regenerate_assistant_turn( # Phase 2: surface the guest (if any) so the prompt assembler and # downstream multi-entity passes see the same shape post_turn does. + # Phase 2 T47 made bot_reset cascade-clear ``chat.guest_bot_id`` when + # the referenced bot is purged (verified by tests/test_reset.py), so + # we trust the column here: it's either a valid bot id or NULL. guest_bot_id = chat.get("guest_bot_id") - guest_bot: dict | None = None - if guest_bot_id is not None: - guest_bot = get_bot(conn, guest_bot_id) - if guest_bot is None: - # Stale guest reference — degrade to single-bot regenerate. - guest_bot_id = None + guest_bot: dict | None = ( + get_bot(conn, guest_bot_id) if guest_bot_id is not None else None + ) # 1. Locate the original assistant_turn event. row = conn.execute( -- 2.52.0 From bfb2ffb6f6ca21b091620256e9f675495e7939b0 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:40:12 -0400 Subject: [PATCH 15/17] chore: pin scene-close-on-cancel behavior + comment rationale (T74.3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 T44 review noted that scene close still runs when a primary turn is cancelled mid-stream and asked the implementer to review. Review finding: the existing behavior is correct, not a bug. The close-detection branch in post_turn consumes ONLY the user's prose (fully appended to the event_log BEFORE streaming starts) and the current container name. It does NOT consume the bot's output. A user who types "we're done here, fade out" and then hits Stop mid-stream still meant to close — the cancelled bot beat doesn't invalidate that intent. - Document the rationale with an inline comment near the close-detection branch in chat/web/turns.py. - Add regression test test_cancelled_turn_still_closes_scene_when_user_prose_signals_close that drives a stream raising CancelledError on first iteration and asserts the scene_closed event still lands. --- chat/web/turns.py | 9 +++ tests/test_turn_flow.py | 121 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) diff --git a/chat/web/turns.py b/chat/web/turns.py index ab1308c..48882f0 100644 --- a/chat/web/turns.py +++ b/chat/web/turns.py @@ -668,6 +668,15 @@ async def post_turn( # close in the same chat) — we have nothing to close. T13 (kickoff) # is the only scene-opener path in v1; Phase 2-3 will handle # automatic re-opening with the next container. + # + # T74.3: this branch deliberately runs even when ``cancelled`` is + # True. Close detection consumes only the user's prose (which is + # fully appended to the event_log BEFORE streaming starts) and the + # current container name; it does NOT consume the bot's output. + # A user who types "we're done here, fade out" and then hits Stop + # mid-stream still meant to close the scene — the cancelled bot + # beat doesn't invalidate that intent. Pinned by + # test_cancelled_turn_still_closes_scene_when_user_prose_signals_close. if scene is not None and prose.strip(): container = None if scene.get("container_id") is not None: diff --git a/tests/test_turn_flow.py b/tests/test_turn_flow.py index 281d123..a30ec24 100644 --- a/tests/test_turn_flow.py +++ b/tests/test_turn_flow.py @@ -715,6 +715,127 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path): assert interjection_payload["interjection_of"] == "bot_b" +def test_cancelled_turn_still_closes_scene_when_user_prose_signals_close( + app_state_setup, tmp_path +): + """T74.3 regression: a cancelled primary stream still triggers scene + close when the user prose carries a hard close signal. + + Rationale (also documented in turns.py near the close-detection + branch): close detection only consumes the user's prose, which is + fully appended to the event_log BEFORE streaming starts. The + cancelled bot beat doesn't invalidate the user's intent to close. + + Implementation: install a MockLLMClient whose ``stream`` raises + CancelledError on the first iteration. The classifier calls (parse, + addressee, scene_close, per-POV summaries) are still served from + the canned queue. The post_turn route ultimately re-raises + CancelledError after recording the partial — TestClient surfaces + that as an exception, so we drive the request inside ``with + pytest.raises``. Despite the exception, the scene_closed event + must land in the event_log. + """ + from typing import AsyncIterator, Sequence + + _seed_chat_with_guest(tmp_path / "test.db") + canned_parse = json.dumps( + {"segments": [{"kind": "narration", "text": "we are done here, fade out"}]} + ) + pov_payload = json.dumps( + { + "summary": "BotA noticed the day winding down.", + "knowledge_facts": [], + "relationship_summary": "warmer", + } + ) + pov_payload_guest = json.dumps( + { + "summary": "BotB watched the scene close.", + "knowledge_facts": [], + "relationship_summary": "warmer", + } + ) + # Canned queue: parse + addressee + 6 state-updates + + # scene_close=True + 2 per-POV summaries. NO interjection slot + # because the cancel path short-circuits the interjection branch. + canned = [ + canned_parse, + json.dumps( + {"addressee_id": "bot_a", "confidence": "medium", "reason": "host"} + ), + # NOTE: no narrative slot — the stream is hijacked below to + # raise CancelledError on first iteration; it never pulls a + # canned response. + _zero_state(), _zero_state(), _zero_state(), + _zero_state(), _zero_state(), _zero_state(), + json.dumps({"should_close": True, "reason": "fade out signaled"}), + pov_payload, + pov_payload_guest, + ] + + class _CancelOnStreamMock: + """Mock LLM client that serves ``generate`` from a canned queue + and raises CancelledError on the FIRST iteration of ``stream``. + + Mirrors :class:`chat.llm.mock.MockLLMClient` for ``generate`` but + diverges on ``stream`` to simulate a mid-stream cancel. + """ + + def __init__(self, canned: list[str]) -> None: + self._canned = list(canned) + + async def generate( + self, messages: Sequence, *, model: str, **params + ) -> str: + return self._canned.pop(0) + + async def stream( + self, messages: Sequence, *, model: str, **params + ) -> AsyncIterator[str]: + # Yield a CancelledError on first iteration to simulate the + # /turns/cancel route firing mid-stream. + raise asyncio.CancelledError + yield # pragma: no cover — keeps this an async generator. + + from chat.web.kickoff import get_llm_client + + mock = _CancelOnStreamMock(canned=list(canned)) + app.dependency_overrides[get_llm_client] = lambda: mock + + try: + # FastAPI/Starlette handles the re-raised CancelledError as an + # internal failure — TestClient surfaces it as a 500 response. + # We don't assert on the status here; the regression is whether + # the scene_closed event still landed in the event_log. + try: + app_state_setup.post( + "/chats/chat_bot_a/turns", + data={"prose": "we are done here, fade out"}, + ) + except BaseException: + # Some Starlette/asyncio versions propagate the + # CancelledError out of the test client; that's fine — the + # partial-record + scene-close still ran before the raise. + pass + finally: + app.dependency_overrides.clear() + + with open_db(tmp_path / "test.db") as conn: + scene_close_count = conn.execute( + "SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'" + ).fetchone()[0] + assistant_payload = conn.execute( + "SELECT payload_json FROM event_log " + "WHERE kind = 'assistant_turn' ORDER BY id" + ).fetchall() + + # Scene close lands despite the cancel. + assert scene_close_count == 1 + # The cancelled assistant_turn was still recorded (truncated=True). + assert len(assistant_payload) == 1 + assert json.loads(assistant_payload[0][0])["truncated"] is True + + def test_interjection_enqueues_significance_job(app_state_setup, tmp_path): """T74.2: when an interjection fires, the interjection memory is enqueued for significance scoring just like the primary memory. -- 2.52.0 From 6d98728a2eb1146d3fb907fb188584a1d2fabefc Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:40:46 -0400 Subject: [PATCH 16/17] chore: remove defensive stale-guest degrade in turns.py (T74.4) T44 carried a defensive degrade-to-1:1 block in post_turn for the case where chat.guest_bot_id pointed at a deleted bot. T47 then fixed the root cause by adding a bot_reset cascade that clears guest_bot_id from any chat that referenced the deleted bot, so the post_turn defensive block was rendered dead. Remove the orphan-clear branch and replace it with a comment documenting that get_bot now returns a real row when guest_bot_id is non-None. The cascade behavior is pinned by test_reset_clears_guest_reference_in_other_chats in tests/test_reset.py. --- chat/web/turns.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/chat/web/turns.py b/chat/web/turns.py index 48882f0..b3a4f0e 100644 --- a/chat/web/turns.py +++ b/chat/web/turns.py @@ -236,11 +236,12 @@ async def post_turn( guest_bot = None guest_bot_id = chat.get("guest_bot_id") if guest_bot_id is not None: + # T47's bot_reset cascade clears guest_bot_id from any chat that + # referenced the deleted bot, so by the time we read it here it's + # either None or a live bot id. The previous defensive + # degrade-to-1:1 block (T44) was rendered dead by T47 and removed + # in T74.4 — get_bot now returns a real row. guest_bot = get_bot(conn, guest_bot_id) - # If the chat references a deleted guest we degrade to single-bot - # rather than 404 — the chat is still usable as a 1:1. - if guest_bot is None: - guest_bot_id = None settings = request.app.state.settings -- 2.52.0 From c6e0130e5921bd2231cc89dc5f3cbc3582c71eb7 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:45:30 -0400 Subject: [PATCH 17/17] docs: phase 2.5 status, prune shipped backlog items, capture phase 2.6 follow-ups (T75) --- CLAUDE.md | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index f8ae596..ac3373c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -174,13 +174,7 @@ Deferred to Phase 2: second bot, group node, scene configurations, witness filte ### Phase 1.5 cleanup backlog -Small follow-ups identified during Phase 1 reviews. Pick up at any time; none are blocking. - -- **`open_db` refactor.** `chat/web/bots.py:get_conn()` duplicates the context-manager body to add `check_same_thread=False`. Extend `open_db(path, *, check_same_thread=True)` and have `get_conn` call it directly — eliminates the duplicated PRAGMA setup and ensures any future PRAGMA tweak only happens in one place. -- **Regenerate broadcasts `turn_html` over SSE.** Currently a refresh is needed (see T29 limitation above). Mirror the broadcast logic from `chat/web/turns.py:post_turn` after the new `assistant_turn` lands. -- **`bot_reset` purges orphaned "you" activity rows** (see limitation above). Either delete `activity` rows by chat-membership or accept the noise indefinitely; the projection-layer fix is one extra `DELETE FROM activity WHERE entity_id='you' AND container_id IN (SELECT id FROM containers WHERE chat_id IN (...))` clause inside `_apply_bot_reset`. -- **Drawer edits for the deferred v1 fields**: edge_trust slider, edge_summary textarea, memory pov_summary textarea, knowledge_facts add/remove. The `manual_edit` projector already supports `edge_trust` / `edge_summary` / `memory_pov_summary` target_kinds — only the routes are missing. Knowledge_facts needs a new dispatch branch. -- **NICE trim order in prompt assembly** drops previous-scene first instead of last (T18 review). Greedy-cuts heuristic vs spec listing order; revisit if v1 play surfaces a real regression. +All items shipped — see Phase 2.5 status below. ## Phase 2 status @@ -194,15 +188,28 @@ Phase 2 shipped end-to-end across **13 tasks** (T36–T48 wave). The multi-entit ### Phase 2.5 / 3 backlog -Carry-overs from Phase 2 reviews and implementer notes. None are blocking; pick up at any time. +All items shipped — see Phase 2.5 status below. -- **Interjection regenerate**: regenerate currently only acts on the addressee turn. Phase 2.5 should extend regenerate to cover the interjection turn too. -- **Classifier-based addressee detection**: substring match is brittle (e.g., names that are common English words, or names appearing inside a quoted aside). A small classifier call could disambiguate. -- **LLM-merged group meta-summary**: current `group_node.summary` is a naive concat of host + guest per-POV summaries. Phase 2.5 should polish with an LLM-merged group view. -- **First-meeting gate**: the drawer's "have they met?" textarea fires every time. Phase 2.5 should check whether the host→guest edge already exists and offer a "they already know each other" toggle to skip re-seeding. -- **Witness flag editing**: drawer doesn't allow editing memory witness flags (read-only). Phase 2.5+ may expose this. -- **Significance for interjection memories**: the interjection's `memory_written` event doesn't enqueue a `SignificanceJob` (per the T44 implementer note). Phase 2.5 should wire this in so interjection memories are scored alongside primary turns. -- **Stale guest reference defensive degrade in `post_turn`**: T44 added a degrade-to-1:1 when `chat.guest_bot_id` points at a deleted bot. T47 fixes the root cause (resets clear the reference); the degrade can probably be removed but is harmless. -- **Scene close on cancel**: scene close runs even when the primary turn is cancelled. Behavior may be intentional but could be argued either way; revisit if it surfaces a real UX regression. -- **Dual `ACTIVITIES:` block**: T43's prompt assembly adds a second `ACTIVITIES:` block for guest activity. Cleaner would be a single block with three bullets and per-bullet trim. -- **Witness role hardcoded in prompt assembly**: `chat/services/prompt.py:436` hardcodes `witness_role="host"` regardless of which bot is speaking. Phase 2.5 should derive the role from chat membership (e.g. `"host" if speaker_bot_id == chat.host_bot_id else "guest"`) so guest-as-speaker prompts retrieve the right memory slice. Test contract pinned in `tests/test_witness_filter_multi.py`. +## Phase 2.5 status + +Phase 2.5 cleanup shipped end-to-end across 8 tasks (T68–T75). Two CLAUDE.md backlogs (Phase 1.5 cleanup, Phase 2.5/3) are now empty; deferred follow-ups discovered during execution are tracked in a new "Phase 2.6 / 3 backlog" section below. + +- **`open_db` with check_same_thread parameter (T68)**: refactored `chat/db/connection.py` so `chat/web/bots.py:get_conn` no longer duplicates the PRAGMA setup. Default behavior preserved. +- **`bot_reset` cross-chat cleanup (T69)**: now purges orphaned "you" activity rows. Note: this also fixed a latent FK constraint crash that was lurking in the projector — `activity.container_id` is FK-referenced and the prior code would have crashed on any reset of a bot whose chat had a non-NULL `container_id` "you" activity row. The bug was masked because no prior test seeded such a row. +- **LLM-merged group meta-summary (T70)**: replaces Phase 2 T45's naive concat with a classifier merge call. Falls back to the naive concat on classifier failure. +- **`prompt.py` polish (T71)**: witness role parametric (`host` vs `guest` derived from chat membership); single `ACTIVITIES:` block with bullet-level trim; NICE trim order kept with documented rationale (greedy cheapest-impact-first beats spec-listing order in practice). +- **Drawer polish (T72)**: deferred v1 edits (edge_trust slider, edge_summary textarea, memory pov_summary textarea, knowledge_facts add/remove) + first-meeting gate (Add-guest form disables prose textarea when host→guest edge already exists; "re-seed anyway" toggle re-enables) + witness flag inline-edit (per-memory checkboxes for [you, host, guest] flags). Two new `manual_edit` projector branches: `edge_knowledge_fact` and `memory_witness`. +- **Regenerate polish (T73)**: regenerate now broadcasts `turn_html_replace` over SSE (NEW event distinct from `turn_html` to avoid breaking the existing append-semantic consumer); regenerate covers interjection turns (re-detects + re-streams or supersedes); defensive stale-guest degrade removed. +- **Turn-flow polish + addressee service (T74)**: classifier-based addressee detection (substring helper kept as no-guest fast path); SignificanceJob enqueued for interjection memories; scene-close-on-cancel pinned with comment + regression test (close detection is genuinely user-prose-only); defensive stale-guest degrade removed. + +### Phase 2.6 / 3 backlog + +New follow-ups discovered during Phase 2.5 execution. None are blocking; pick up at any time. + +- **Frontend handler for `turn_html_replace` SSE event (from T73.1 review)**: regenerate's backend broadcast lands, but no live tab swaps the regenerated turn until a JS handler is wired. The existing `turn_html` event uses HTMX `sse-swap` to append; `turn_html_replace` ships JSON with `supersedes_id` for replacement semantics. Phase 2.6 should wire the JS to swap the prior turn's DOM node in place. +- **Cancel/stop hook for in-flight regenerate streams (from T73 review)**: `post_turn` registers stream tasks in `_in_flight_tasks` so the user can stop them. Regenerate doesn't. A user clicking "Stop" mid-regenerate has no cancel hook today. +- **DRY: regenerate vs post_turn (from T73 review)**: recent-dialogue assembly and prior-edges block are duplicated between `chat/services/regenerate.py` and `chat/web/turns.py`. Extract to shared helpers analogous to `_gather_state_update_inputs`. +- **Sibling-discovery query optimization (from T73 review)**: `regenerate.py`'s sibling-assistant-turn lookup scans all non-superseded `assistant_turn` rows globally. Adding a `chat_id` predicate via JSON extraction (or a denormalized column) bounds the cost to per-chat scale. +- **`_witness_role_for` defensive coding (from T71 review)**: helper returns `"guest"` when `host_bot_id is None`, which is wrong for Phase-1 chats. Defensive: `return "host" if host_bot_id is None or speaker_bot_id == host_bot_id else "guest"`. Not exercised by current tests; harden as a precaution. +- **Confidence type tightening (from T74 review)**: `chat/services/addressee.py::AddresseeDecision.confidence` could be typed as `Literal["high","medium","low"]` for stricter validation. Currently `str` with a comment. +- **Scene-close-on-cancel UX revisit**: T74.3 pinned the existing behavior (close fires even on cancel). If real play-testing surfaces a regression, revisit. -- 2.52.0