merge: T70 LLM-merged group meta-summary

This commit is contained in:
Joseph Doherty
2026-04-26 17:09:16 -04:00
2 changed files with 244 additions and 13 deletions
+170 -5
View File
@@ -636,8 +636,10 @@ async def test_close_with_guest_updates_both_edges(tmp_path):
@pytest.mark.asyncio
async def test_close_with_group_node_updates_group_summary(tmp_path):
"""When a group_node row exists, scene close emits group_node_updated
with a non-empty summary that mentions both bots' names (v2 naive
concat of per-POV summaries)."""
with a non-empty summary that mentions both bots' names. T70 swapped
the Phase 2 naive concat for an LLM-merged summary; this regression
test feeds bad-JSON merge responses so the helper falls back to the
original naive-concat shape, preserving the original assertions."""
db = tmp_path / "t.db"
apply_migrations(db)
import chat.state.group_node # noqa: F401 -- register handlers
@@ -660,7 +662,11 @@ async def test_close_with_group_node_updates_group_summary(tmp_path):
_seed_two_bot_scene(conn, with_group_node=True)
project(conn)
client = MockLLMClient(canned=[host_canned, guest_canned])
# 2 valid (host POV, guest POV) + 3 bad-JSON merge attempts ->
# merge_group_summary falls back to the naive concat default.
client = MockLLMClient(
canned=[host_canned, guest_canned, "bad1", "bad2", "bad3"]
)
await apply_scene_close_summary(
conn,
client,
@@ -675,8 +681,167 @@ async def test_close_with_group_node_updates_group_summary(tmp_path):
gn = get_group_node(conn, "chat_bot_a")
assert gn is not None
assert gn["summary"] # non-empty
# Naive concat surfaces both bot names in the group summary.
# Naive-concat fallback surfaces both bot names in the group summary.
assert "BotA" in gn["summary"]
assert "BotB" in gn["summary"]
# Phase 2 v2 keeps dynamic empty (Phase 3 polishes).
# Naive-concat fallback keeps dynamic empty.
assert gn["dynamic"] == ""
# ---------------------------------------------------------------------------
# T70: LLM-merged group meta-summary on scene close.
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_group_summary_merges_per_pov_via_classifier_when_guest_present(
tmp_path,
):
"""With a guest present and a group_node row, scene close runs the
merge classifier as a third call after the two per-POV summarize_scene
calls; its output drives the group_node summary + dynamic fields."""
db = tmp_path / "t.db"
apply_migrations(db)
import chat.state.group_node # noqa: F401 -- register handlers
host_canned = json.dumps(
{
"summary": "BotA appreciated the calm.",
"knowledge_facts": [],
"relationship_summary": "BotA felt steady.",
}
)
guest_canned = json.dumps(
{
"summary": "BotB found the room friendly.",
"knowledge_facts": [],
"relationship_summary": "BotB warmed up.",
}
)
merge_canned = json.dumps(
{"summary": "merged group view", "dynamic": "warm rapport"}
)
with open_db(db) as conn:
_seed_two_bot_scene(conn, with_group_node=True)
project(conn)
# Canned-queue layout matches the production call order in
# apply_scene_close_summary: host POV summarize_scene runs first,
# then guest POV summarize_scene, then merge_group_summary.
client = MockLLMClient(
canned=[host_canned, guest_canned, merge_canned]
)
await apply_scene_close_summary(
conn,
client,
classifier_model="x",
chat_id="chat_bot_a",
scene_id=1,
host_bot_id="bot_a",
)
# All three canned entries consumed -> classifier ran exactly 3x.
assert client._canned == []
from chat.state.group_node import get_group_node
gn = get_group_node(conn, "chat_bot_a")
assert gn is not None
assert gn["summary"] == "merged group view"
assert gn["dynamic"] == "warm rapport"
@pytest.mark.asyncio
async def test_group_summary_falls_back_to_naive_concat_on_classifier_failure(
tmp_path,
):
"""If the merge classifier flaps (bad JSON across all 3 retries), the
helper falls back to the original Phase 2 naive concat shape and
leaves dynamic empty."""
db = tmp_path / "t.db"
apply_migrations(db)
import chat.state.group_node # noqa: F401 -- register handlers
host_canned = json.dumps(
{
"summary": "BotA appreciated the calm.",
"knowledge_facts": [],
"relationship_summary": "BotA felt steady.",
}
)
guest_canned = json.dumps(
{
"summary": "BotB found the room friendly.",
"knowledge_facts": [],
"relationship_summary": "BotB warmed up.",
}
)
with open_db(db) as conn:
_seed_two_bot_scene(conn, with_group_node=True)
project(conn)
# 2 valid POV summaries + 3 bad-JSON merge attempts trip the
# classifier's retry-then-default path; the default is the naive
# concat fallback.
client = MockLLMClient(
canned=[host_canned, guest_canned, "bad1", "bad2", "bad3"]
)
await apply_scene_close_summary(
conn,
client,
classifier_model="x",
chat_id="chat_bot_a",
scene_id=1,
host_bot_id="bot_a",
)
from chat.state.group_node import get_group_node
gn = get_group_node(conn, "chat_bot_a")
assert gn is not None
expected = (
"BotA: BotA appreciated the calm.\n\n"
"BotB: BotB found the room friendly."
)
assert gn["summary"] == expected
assert gn["dynamic"] == ""
@pytest.mark.asyncio
async def test_group_summary_skipped_when_no_guest(tmp_path):
"""No-guest path: scene close does NOT invoke merge_group_summary
and emits no group_node_updated event. Confirms the existing
`if guest_bot_id is not None` gating at the call site."""
db = tmp_path / "t.db"
apply_migrations(db)
canned = json.dumps(
{
"summary": "BotA helped you talk through the deadline anxiety.",
"knowledge_facts": ["Deadline next Friday."],
"relationship_summary": "BotA leaned in supportively.",
}
)
with open_db(db) as conn:
_seed_single_bot_scene(conn)
project(conn)
# Only 1 canned entry; if merge_group_summary were called the
# MockLLMClient would IndexError on the empty queue.
client = MockLLMClient(canned=[canned])
await apply_scene_close_summary(
conn,
client,
classifier_model="x",
chat_id="chat_bot_a",
scene_id=1,
host_bot_id="bot_a",
)
# Exactly the host POV call consumed, nothing else.
assert client._canned == []
# No group_node_updated event was emitted.
rows = conn.execute(
"SELECT 1 FROM event_log WHERE kind = 'group_node_updated'"
).fetchall()
assert rows == []