feat: LLM-merged group meta-summary (T70)

This commit is contained in:
Joseph Doherty
2026-04-26 17:07:12 -04:00
parent e05f28e9d5
commit 13c23fd898
2 changed files with 244 additions and 13 deletions
+74 -8
View File
@@ -334,26 +334,92 @@ async def apply_scene_close_summary(
timeout_s=timeout_s,
)
# Group node update: naive per-POV concat for v2. Only fires when
# both POVs ran (i.e. the guest is present) and a group_node row
# exists for this chat.
# Group node update: T70 runs a third classifier call to merge the
# two per-POV summaries into a coherent group-level view + a brief
# group-dynamic note. Falls back to the Phase 2 naive concat on
# classifier failure (see :func:`merge_group_summary`). Only fires
# when both POVs ran (i.e. the guest is present) and a group_node
# row exists for this chat.
if guest_pov is not None and get_group_node(conn, chat_id) is not None:
host_bot = get_bot(conn, host_bot_id) or {"name": host_bot_id}
guest_bot = get_bot(conn, guest_bot_id) or {"name": guest_bot_id}
host_name = host_bot.get("name", host_bot_id) or host_bot_id
guest_name = guest_bot.get("name", guest_bot_id) or guest_bot_id
group_summary = (
f"{host_name}: {host_pov.summary}\n\n"
f"{guest_name}: {guest_pov.summary}"
merged = await merge_group_summary(
client,
classifier_model=classifier_model,
host_name=host_name,
host_pov_summary=host_pov.summary,
guest_name=guest_name,
guest_pov_summary=guest_pov.summary,
timeout_s=timeout_s,
)
append_and_apply(
conn,
kind="group_node_updated",
payload={
"chat_id": chat_id,
"summary": group_summary,
"dynamic": "",
"summary": merged.summary,
"dynamic": merged.dynamic,
},
)
return host_pov
class GroupMetaSummary(BaseModel):
"""Classifier output: a merged group-level view of a closed scene.
Defaults are an empty no-op so callers can use the schema's default
as a sentinel; in practice :func:`merge_group_summary` builds an
explicit naive-concat fallback rather than returning these defaults
directly so existing Phase 2 behavior is preserved on classifier
failure.
"""
summary: str = ""
dynamic: str = ""
_GROUP_MERGE_SYSTEM = (
"Given two per-POV scene summaries from a 3-entity scene (you + "
"host + guest), produce a coherent group-level summary capturing "
"the shared events as both witnesses experienced them, plus a "
"brief 'dynamic' note describing the trio's group dynamic during "
"the scene. Output strict JSON matching schema."
)
async def merge_group_summary(
client: LLMClient,
*,
classifier_model: str,
host_name: str,
host_pov_summary: str,
guest_name: str,
guest_pov_summary: str,
timeout_s: float = 30.0,
) -> GroupMetaSummary:
"""Merge two per-POV scene summaries into a coherent group-level
summary + group-dynamic note. Falls back to the naive concat (the
existing behavior) on classifier failure."""
user = (
f"{host_name} (host) POV summary:\n{host_pov_summary}\n\n"
f"{guest_name} (guest) POV summary:\n{guest_pov_summary}"
)
fallback = GroupMetaSummary(
summary=(
f"{host_name}: {host_pov_summary}\n\n"
f"{guest_name}: {guest_pov_summary}"
),
dynamic="",
)
return await classify(
client,
model=classifier_model,
system=_GROUP_MERGE_SYSTEM,
user=user,
schema=GroupMetaSummary,
default=fallback,
timeout_s=timeout_s,
)
+170 -5
View File
@@ -636,8 +636,10 @@ async def test_close_with_guest_updates_both_edges(tmp_path):
@pytest.mark.asyncio
async def test_close_with_group_node_updates_group_summary(tmp_path):
"""When a group_node row exists, scene close emits group_node_updated
with a non-empty summary that mentions both bots' names (v2 naive
concat of per-POV summaries)."""
with a non-empty summary that mentions both bots' names. T70 swapped
the Phase 2 naive concat for an LLM-merged summary; this regression
test feeds bad-JSON merge responses so the helper falls back to the
original naive-concat shape, preserving the original assertions."""
db = tmp_path / "t.db"
apply_migrations(db)
import chat.state.group_node # noqa: F401 -- register handlers
@@ -660,7 +662,11 @@ async def test_close_with_group_node_updates_group_summary(tmp_path):
_seed_two_bot_scene(conn, with_group_node=True)
project(conn)
client = MockLLMClient(canned=[host_canned, guest_canned])
# 2 valid (host POV, guest POV) + 3 bad-JSON merge attempts ->
# merge_group_summary falls back to the naive concat default.
client = MockLLMClient(
canned=[host_canned, guest_canned, "bad1", "bad2", "bad3"]
)
await apply_scene_close_summary(
conn,
client,
@@ -675,8 +681,167 @@ async def test_close_with_group_node_updates_group_summary(tmp_path):
gn = get_group_node(conn, "chat_bot_a")
assert gn is not None
assert gn["summary"] # non-empty
# Naive concat surfaces both bot names in the group summary.
# Naive-concat fallback surfaces both bot names in the group summary.
assert "BotA" in gn["summary"]
assert "BotB" in gn["summary"]
# Phase 2 v2 keeps dynamic empty (Phase 3 polishes).
# Naive-concat fallback keeps dynamic empty.
assert gn["dynamic"] == ""
# ---------------------------------------------------------------------------
# T70: LLM-merged group meta-summary on scene close.
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_group_summary_merges_per_pov_via_classifier_when_guest_present(
tmp_path,
):
"""With a guest present and a group_node row, scene close runs the
merge classifier as a third call after the two per-POV summarize_scene
calls; its output drives the group_node summary + dynamic fields."""
db = tmp_path / "t.db"
apply_migrations(db)
import chat.state.group_node # noqa: F401 -- register handlers
host_canned = json.dumps(
{
"summary": "BotA appreciated the calm.",
"knowledge_facts": [],
"relationship_summary": "BotA felt steady.",
}
)
guest_canned = json.dumps(
{
"summary": "BotB found the room friendly.",
"knowledge_facts": [],
"relationship_summary": "BotB warmed up.",
}
)
merge_canned = json.dumps(
{"summary": "merged group view", "dynamic": "warm rapport"}
)
with open_db(db) as conn:
_seed_two_bot_scene(conn, with_group_node=True)
project(conn)
# Canned-queue layout matches the production call order in
# apply_scene_close_summary: host POV summarize_scene runs first,
# then guest POV summarize_scene, then merge_group_summary.
client = MockLLMClient(
canned=[host_canned, guest_canned, merge_canned]
)
await apply_scene_close_summary(
conn,
client,
classifier_model="x",
chat_id="chat_bot_a",
scene_id=1,
host_bot_id="bot_a",
)
# All three canned entries consumed -> classifier ran exactly 3x.
assert client._canned == []
from chat.state.group_node import get_group_node
gn = get_group_node(conn, "chat_bot_a")
assert gn is not None
assert gn["summary"] == "merged group view"
assert gn["dynamic"] == "warm rapport"
@pytest.mark.asyncio
async def test_group_summary_falls_back_to_naive_concat_on_classifier_failure(
tmp_path,
):
"""If the merge classifier flaps (bad JSON across all 3 retries), the
helper falls back to the original Phase 2 naive concat shape and
leaves dynamic empty."""
db = tmp_path / "t.db"
apply_migrations(db)
import chat.state.group_node # noqa: F401 -- register handlers
host_canned = json.dumps(
{
"summary": "BotA appreciated the calm.",
"knowledge_facts": [],
"relationship_summary": "BotA felt steady.",
}
)
guest_canned = json.dumps(
{
"summary": "BotB found the room friendly.",
"knowledge_facts": [],
"relationship_summary": "BotB warmed up.",
}
)
with open_db(db) as conn:
_seed_two_bot_scene(conn, with_group_node=True)
project(conn)
# 2 valid POV summaries + 3 bad-JSON merge attempts trip the
# classifier's retry-then-default path; the default is the naive
# concat fallback.
client = MockLLMClient(
canned=[host_canned, guest_canned, "bad1", "bad2", "bad3"]
)
await apply_scene_close_summary(
conn,
client,
classifier_model="x",
chat_id="chat_bot_a",
scene_id=1,
host_bot_id="bot_a",
)
from chat.state.group_node import get_group_node
gn = get_group_node(conn, "chat_bot_a")
assert gn is not None
expected = (
"BotA: BotA appreciated the calm.\n\n"
"BotB: BotB found the room friendly."
)
assert gn["summary"] == expected
assert gn["dynamic"] == ""
@pytest.mark.asyncio
async def test_group_summary_skipped_when_no_guest(tmp_path):
"""No-guest path: scene close does NOT invoke merge_group_summary
and emits no group_node_updated event. Confirms the existing
`if guest_bot_id is not None` gating at the call site."""
db = tmp_path / "t.db"
apply_migrations(db)
canned = json.dumps(
{
"summary": "BotA helped you talk through the deadline anxiety.",
"knowledge_facts": ["Deadline next Friday."],
"relationship_summary": "BotA leaned in supportively.",
}
)
with open_db(db) as conn:
_seed_single_bot_scene(conn)
project(conn)
# Only 1 canned entry; if merge_group_summary were called the
# MockLLMClient would IndexError on the empty queue.
client = MockLLMClient(canned=[canned])
await apply_scene_close_summary(
conn,
client,
classifier_model="x",
chat_id="chat_bot_a",
scene_id=1,
host_bot_id="bot_a",
)
# Exactly the host POV call consumed, nothing else.
assert client._canned == []
# No group_node_updated event was emitted.
rows = conn.execute(
"SELECT 1 FROM event_log WHERE kind = 'group_node_updated'"
).fetchall()
assert rows == []