From 13c23fd8981e777099c5c9dc55bcbf1dfd59c12e Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 17:07:12 -0400 Subject: [PATCH] feat: LLM-merged group meta-summary (T70) --- chat/services/scene_summarize.py | 82 +++++++++++++-- tests/test_per_pov_summary.py | 175 ++++++++++++++++++++++++++++++- 2 files changed, 244 insertions(+), 13 deletions(-) diff --git a/chat/services/scene_summarize.py b/chat/services/scene_summarize.py index d57f7e2..2e74ddf 100644 --- a/chat/services/scene_summarize.py +++ b/chat/services/scene_summarize.py @@ -334,26 +334,92 @@ async def apply_scene_close_summary( timeout_s=timeout_s, ) - # Group node update: naive per-POV concat for v2. Only fires when - # both POVs ran (i.e. the guest is present) and a group_node row - # exists for this chat. + # Group node update: T70 runs a third classifier call to merge the + # two per-POV summaries into a coherent group-level view + a brief + # group-dynamic note. Falls back to the Phase 2 naive concat on + # classifier failure (see :func:`merge_group_summary`). Only fires + # when both POVs ran (i.e. the guest is present) and a group_node + # row exists for this chat. if guest_pov is not None and get_group_node(conn, chat_id) is not None: host_bot = get_bot(conn, host_bot_id) or {"name": host_bot_id} guest_bot = get_bot(conn, guest_bot_id) or {"name": guest_bot_id} host_name = host_bot.get("name", host_bot_id) or host_bot_id guest_name = guest_bot.get("name", guest_bot_id) or guest_bot_id - group_summary = ( - f"{host_name}: {host_pov.summary}\n\n" - f"{guest_name}: {guest_pov.summary}" + merged = await merge_group_summary( + client, + classifier_model=classifier_model, + host_name=host_name, + host_pov_summary=host_pov.summary, + guest_name=guest_name, + guest_pov_summary=guest_pov.summary, + timeout_s=timeout_s, ) append_and_apply( conn, kind="group_node_updated", payload={ "chat_id": chat_id, - "summary": group_summary, - "dynamic": "", + "summary": merged.summary, + "dynamic": merged.dynamic, }, ) return host_pov + + +class GroupMetaSummary(BaseModel): + """Classifier output: a merged group-level view of a closed scene. + + Defaults are an empty no-op so callers can use the schema's default + as a sentinel; in practice :func:`merge_group_summary` builds an + explicit naive-concat fallback rather than returning these defaults + directly so existing Phase 2 behavior is preserved on classifier + failure. + """ + + summary: str = "" + dynamic: str = "" + + +_GROUP_MERGE_SYSTEM = ( + "Given two per-POV scene summaries from a 3-entity scene (you + " + "host + guest), produce a coherent group-level summary capturing " + "the shared events as both witnesses experienced them, plus a " + "brief 'dynamic' note describing the trio's group dynamic during " + "the scene. Output strict JSON matching schema." +) + + +async def merge_group_summary( + client: LLMClient, + *, + classifier_model: str, + host_name: str, + host_pov_summary: str, + guest_name: str, + guest_pov_summary: str, + timeout_s: float = 30.0, +) -> GroupMetaSummary: + """Merge two per-POV scene summaries into a coherent group-level + summary + group-dynamic note. Falls back to the naive concat (the + existing behavior) on classifier failure.""" + user = ( + f"{host_name} (host) POV summary:\n{host_pov_summary}\n\n" + f"{guest_name} (guest) POV summary:\n{guest_pov_summary}" + ) + fallback = GroupMetaSummary( + summary=( + f"{host_name}: {host_pov_summary}\n\n" + f"{guest_name}: {guest_pov_summary}" + ), + dynamic="", + ) + return await classify( + client, + model=classifier_model, + system=_GROUP_MERGE_SYSTEM, + user=user, + schema=GroupMetaSummary, + default=fallback, + timeout_s=timeout_s, + ) diff --git a/tests/test_per_pov_summary.py b/tests/test_per_pov_summary.py index f33345a..c401ea8 100644 --- a/tests/test_per_pov_summary.py +++ b/tests/test_per_pov_summary.py @@ -636,8 +636,10 @@ async def test_close_with_guest_updates_both_edges(tmp_path): @pytest.mark.asyncio async def test_close_with_group_node_updates_group_summary(tmp_path): """When a group_node row exists, scene close emits group_node_updated - with a non-empty summary that mentions both bots' names (v2 naive - concat of per-POV summaries).""" + with a non-empty summary that mentions both bots' names. T70 swapped + the Phase 2 naive concat for an LLM-merged summary; this regression + test feeds bad-JSON merge responses so the helper falls back to the + original naive-concat shape, preserving the original assertions.""" db = tmp_path / "t.db" apply_migrations(db) import chat.state.group_node # noqa: F401 -- register handlers @@ -660,7 +662,11 @@ async def test_close_with_group_node_updates_group_summary(tmp_path): _seed_two_bot_scene(conn, with_group_node=True) project(conn) - client = MockLLMClient(canned=[host_canned, guest_canned]) + # 2 valid (host POV, guest POV) + 3 bad-JSON merge attempts -> + # merge_group_summary falls back to the naive concat default. + client = MockLLMClient( + canned=[host_canned, guest_canned, "bad1", "bad2", "bad3"] + ) await apply_scene_close_summary( conn, client, @@ -675,8 +681,167 @@ async def test_close_with_group_node_updates_group_summary(tmp_path): gn = get_group_node(conn, "chat_bot_a") assert gn is not None assert gn["summary"] # non-empty - # Naive concat surfaces both bot names in the group summary. + # Naive-concat fallback surfaces both bot names in the group summary. assert "BotA" in gn["summary"] assert "BotB" in gn["summary"] - # Phase 2 v2 keeps dynamic empty (Phase 3 polishes). + # Naive-concat fallback keeps dynamic empty. assert gn["dynamic"] == "" + + +# --------------------------------------------------------------------------- +# T70: LLM-merged group meta-summary on scene close. +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_group_summary_merges_per_pov_via_classifier_when_guest_present( + tmp_path, +): + """With a guest present and a group_node row, scene close runs the + merge classifier as a third call after the two per-POV summarize_scene + calls; its output drives the group_node summary + dynamic fields.""" + db = tmp_path / "t.db" + apply_migrations(db) + import chat.state.group_node # noqa: F401 -- register handlers + + host_canned = json.dumps( + { + "summary": "BotA appreciated the calm.", + "knowledge_facts": [], + "relationship_summary": "BotA felt steady.", + } + ) + guest_canned = json.dumps( + { + "summary": "BotB found the room friendly.", + "knowledge_facts": [], + "relationship_summary": "BotB warmed up.", + } + ) + merge_canned = json.dumps( + {"summary": "merged group view", "dynamic": "warm rapport"} + ) + with open_db(db) as conn: + _seed_two_bot_scene(conn, with_group_node=True) + project(conn) + + # Canned-queue layout matches the production call order in + # apply_scene_close_summary: host POV summarize_scene runs first, + # then guest POV summarize_scene, then merge_group_summary. + client = MockLLMClient( + canned=[host_canned, guest_canned, merge_canned] + ) + await apply_scene_close_summary( + conn, + client, + classifier_model="x", + chat_id="chat_bot_a", + scene_id=1, + host_bot_id="bot_a", + ) + + # All three canned entries consumed -> classifier ran exactly 3x. + assert client._canned == [] + + from chat.state.group_node import get_group_node + + gn = get_group_node(conn, "chat_bot_a") + assert gn is not None + assert gn["summary"] == "merged group view" + assert gn["dynamic"] == "warm rapport" + + +@pytest.mark.asyncio +async def test_group_summary_falls_back_to_naive_concat_on_classifier_failure( + tmp_path, +): + """If the merge classifier flaps (bad JSON across all 3 retries), the + helper falls back to the original Phase 2 naive concat shape and + leaves dynamic empty.""" + db = tmp_path / "t.db" + apply_migrations(db) + import chat.state.group_node # noqa: F401 -- register handlers + + host_canned = json.dumps( + { + "summary": "BotA appreciated the calm.", + "knowledge_facts": [], + "relationship_summary": "BotA felt steady.", + } + ) + guest_canned = json.dumps( + { + "summary": "BotB found the room friendly.", + "knowledge_facts": [], + "relationship_summary": "BotB warmed up.", + } + ) + with open_db(db) as conn: + _seed_two_bot_scene(conn, with_group_node=True) + project(conn) + + # 2 valid POV summaries + 3 bad-JSON merge attempts trip the + # classifier's retry-then-default path; the default is the naive + # concat fallback. + client = MockLLMClient( + canned=[host_canned, guest_canned, "bad1", "bad2", "bad3"] + ) + await apply_scene_close_summary( + conn, + client, + classifier_model="x", + chat_id="chat_bot_a", + scene_id=1, + host_bot_id="bot_a", + ) + + from chat.state.group_node import get_group_node + + gn = get_group_node(conn, "chat_bot_a") + assert gn is not None + expected = ( + "BotA: BotA appreciated the calm.\n\n" + "BotB: BotB found the room friendly." + ) + assert gn["summary"] == expected + assert gn["dynamic"] == "" + + +@pytest.mark.asyncio +async def test_group_summary_skipped_when_no_guest(tmp_path): + """No-guest path: scene close does NOT invoke merge_group_summary + and emits no group_node_updated event. Confirms the existing + `if guest_bot_id is not None` gating at the call site.""" + db = tmp_path / "t.db" + apply_migrations(db) + canned = json.dumps( + { + "summary": "BotA helped you talk through the deadline anxiety.", + "knowledge_facts": ["Deadline next Friday."], + "relationship_summary": "BotA leaned in supportively.", + } + ) + with open_db(db) as conn: + _seed_single_bot_scene(conn) + project(conn) + + # Only 1 canned entry; if merge_group_summary were called the + # MockLLMClient would IndexError on the empty queue. + client = MockLLMClient(canned=[canned]) + await apply_scene_close_summary( + conn, + client, + classifier_model="x", + chat_id="chat_bot_a", + scene_id=1, + host_bot_id="bot_a", + ) + + # Exactly the host POV call consumed, nothing else. + assert client._canned == [] + + # No group_node_updated event was emitted. + rows = conn.execute( + "SELECT 1 FROM event_log WHERE kind = 'group_node_updated'" + ).fetchall() + assert rows == []