merge: T70 LLM-merged group meta-summary
This commit is contained in:
@@ -334,26 +334,92 @@ async def apply_scene_close_summary(
|
||||
timeout_s=timeout_s,
|
||||
)
|
||||
|
||||
# Group node update: naive per-POV concat for v2. Only fires when
|
||||
# both POVs ran (i.e. the guest is present) and a group_node row
|
||||
# exists for this chat.
|
||||
# Group node update: T70 runs a third classifier call to merge the
|
||||
# two per-POV summaries into a coherent group-level view + a brief
|
||||
# group-dynamic note. Falls back to the Phase 2 naive concat on
|
||||
# classifier failure (see :func:`merge_group_summary`). Only fires
|
||||
# when both POVs ran (i.e. the guest is present) and a group_node
|
||||
# row exists for this chat.
|
||||
if guest_pov is not None and get_group_node(conn, chat_id) is not None:
|
||||
host_bot = get_bot(conn, host_bot_id) or {"name": host_bot_id}
|
||||
guest_bot = get_bot(conn, guest_bot_id) or {"name": guest_bot_id}
|
||||
host_name = host_bot.get("name", host_bot_id) or host_bot_id
|
||||
guest_name = guest_bot.get("name", guest_bot_id) or guest_bot_id
|
||||
group_summary = (
|
||||
f"{host_name}: {host_pov.summary}\n\n"
|
||||
f"{guest_name}: {guest_pov.summary}"
|
||||
merged = await merge_group_summary(
|
||||
client,
|
||||
classifier_model=classifier_model,
|
||||
host_name=host_name,
|
||||
host_pov_summary=host_pov.summary,
|
||||
guest_name=guest_name,
|
||||
guest_pov_summary=guest_pov.summary,
|
||||
timeout_s=timeout_s,
|
||||
)
|
||||
append_and_apply(
|
||||
conn,
|
||||
kind="group_node_updated",
|
||||
payload={
|
||||
"chat_id": chat_id,
|
||||
"summary": group_summary,
|
||||
"dynamic": "",
|
||||
"summary": merged.summary,
|
||||
"dynamic": merged.dynamic,
|
||||
},
|
||||
)
|
||||
|
||||
return host_pov
|
||||
|
||||
|
||||
class GroupMetaSummary(BaseModel):
|
||||
"""Classifier output: a merged group-level view of a closed scene.
|
||||
|
||||
Defaults are an empty no-op so callers can use the schema's default
|
||||
as a sentinel; in practice :func:`merge_group_summary` builds an
|
||||
explicit naive-concat fallback rather than returning these defaults
|
||||
directly so existing Phase 2 behavior is preserved on classifier
|
||||
failure.
|
||||
"""
|
||||
|
||||
summary: str = ""
|
||||
dynamic: str = ""
|
||||
|
||||
|
||||
_GROUP_MERGE_SYSTEM = (
|
||||
"Given two per-POV scene summaries from a 3-entity scene (you + "
|
||||
"host + guest), produce a coherent group-level summary capturing "
|
||||
"the shared events as both witnesses experienced them, plus a "
|
||||
"brief 'dynamic' note describing the trio's group dynamic during "
|
||||
"the scene. Output strict JSON matching schema."
|
||||
)
|
||||
|
||||
|
||||
async def merge_group_summary(
|
||||
client: LLMClient,
|
||||
*,
|
||||
classifier_model: str,
|
||||
host_name: str,
|
||||
host_pov_summary: str,
|
||||
guest_name: str,
|
||||
guest_pov_summary: str,
|
||||
timeout_s: float = 30.0,
|
||||
) -> GroupMetaSummary:
|
||||
"""Merge two per-POV scene summaries into a coherent group-level
|
||||
summary + group-dynamic note. Falls back to the naive concat (the
|
||||
existing behavior) on classifier failure."""
|
||||
user = (
|
||||
f"{host_name} (host) POV summary:\n{host_pov_summary}\n\n"
|
||||
f"{guest_name} (guest) POV summary:\n{guest_pov_summary}"
|
||||
)
|
||||
fallback = GroupMetaSummary(
|
||||
summary=(
|
||||
f"{host_name}: {host_pov_summary}\n\n"
|
||||
f"{guest_name}: {guest_pov_summary}"
|
||||
),
|
||||
dynamic="",
|
||||
)
|
||||
return await classify(
|
||||
client,
|
||||
model=classifier_model,
|
||||
system=_GROUP_MERGE_SYSTEM,
|
||||
user=user,
|
||||
schema=GroupMetaSummary,
|
||||
default=fallback,
|
||||
timeout_s=timeout_s,
|
||||
)
|
||||
|
||||
@@ -636,8 +636,10 @@ async def test_close_with_guest_updates_both_edges(tmp_path):
|
||||
@pytest.mark.asyncio
|
||||
async def test_close_with_group_node_updates_group_summary(tmp_path):
|
||||
"""When a group_node row exists, scene close emits group_node_updated
|
||||
with a non-empty summary that mentions both bots' names (v2 naive
|
||||
concat of per-POV summaries)."""
|
||||
with a non-empty summary that mentions both bots' names. T70 swapped
|
||||
the Phase 2 naive concat for an LLM-merged summary; this regression
|
||||
test feeds bad-JSON merge responses so the helper falls back to the
|
||||
original naive-concat shape, preserving the original assertions."""
|
||||
db = tmp_path / "t.db"
|
||||
apply_migrations(db)
|
||||
import chat.state.group_node # noqa: F401 -- register handlers
|
||||
@@ -660,7 +662,11 @@ async def test_close_with_group_node_updates_group_summary(tmp_path):
|
||||
_seed_two_bot_scene(conn, with_group_node=True)
|
||||
project(conn)
|
||||
|
||||
client = MockLLMClient(canned=[host_canned, guest_canned])
|
||||
# 2 valid (host POV, guest POV) + 3 bad-JSON merge attempts ->
|
||||
# merge_group_summary falls back to the naive concat default.
|
||||
client = MockLLMClient(
|
||||
canned=[host_canned, guest_canned, "bad1", "bad2", "bad3"]
|
||||
)
|
||||
await apply_scene_close_summary(
|
||||
conn,
|
||||
client,
|
||||
@@ -675,8 +681,167 @@ async def test_close_with_group_node_updates_group_summary(tmp_path):
|
||||
gn = get_group_node(conn, "chat_bot_a")
|
||||
assert gn is not None
|
||||
assert gn["summary"] # non-empty
|
||||
# Naive concat surfaces both bot names in the group summary.
|
||||
# Naive-concat fallback surfaces both bot names in the group summary.
|
||||
assert "BotA" in gn["summary"]
|
||||
assert "BotB" in gn["summary"]
|
||||
# Phase 2 v2 keeps dynamic empty (Phase 3 polishes).
|
||||
# Naive-concat fallback keeps dynamic empty.
|
||||
assert gn["dynamic"] == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# T70: LLM-merged group meta-summary on scene close.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_group_summary_merges_per_pov_via_classifier_when_guest_present(
|
||||
tmp_path,
|
||||
):
|
||||
"""With a guest present and a group_node row, scene close runs the
|
||||
merge classifier as a third call after the two per-POV summarize_scene
|
||||
calls; its output drives the group_node summary + dynamic fields."""
|
||||
db = tmp_path / "t.db"
|
||||
apply_migrations(db)
|
||||
import chat.state.group_node # noqa: F401 -- register handlers
|
||||
|
||||
host_canned = json.dumps(
|
||||
{
|
||||
"summary": "BotA appreciated the calm.",
|
||||
"knowledge_facts": [],
|
||||
"relationship_summary": "BotA felt steady.",
|
||||
}
|
||||
)
|
||||
guest_canned = json.dumps(
|
||||
{
|
||||
"summary": "BotB found the room friendly.",
|
||||
"knowledge_facts": [],
|
||||
"relationship_summary": "BotB warmed up.",
|
||||
}
|
||||
)
|
||||
merge_canned = json.dumps(
|
||||
{"summary": "merged group view", "dynamic": "warm rapport"}
|
||||
)
|
||||
with open_db(db) as conn:
|
||||
_seed_two_bot_scene(conn, with_group_node=True)
|
||||
project(conn)
|
||||
|
||||
# Canned-queue layout matches the production call order in
|
||||
# apply_scene_close_summary: host POV summarize_scene runs first,
|
||||
# then guest POV summarize_scene, then merge_group_summary.
|
||||
client = MockLLMClient(
|
||||
canned=[host_canned, guest_canned, merge_canned]
|
||||
)
|
||||
await apply_scene_close_summary(
|
||||
conn,
|
||||
client,
|
||||
classifier_model="x",
|
||||
chat_id="chat_bot_a",
|
||||
scene_id=1,
|
||||
host_bot_id="bot_a",
|
||||
)
|
||||
|
||||
# All three canned entries consumed -> classifier ran exactly 3x.
|
||||
assert client._canned == []
|
||||
|
||||
from chat.state.group_node import get_group_node
|
||||
|
||||
gn = get_group_node(conn, "chat_bot_a")
|
||||
assert gn is not None
|
||||
assert gn["summary"] == "merged group view"
|
||||
assert gn["dynamic"] == "warm rapport"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_group_summary_falls_back_to_naive_concat_on_classifier_failure(
|
||||
tmp_path,
|
||||
):
|
||||
"""If the merge classifier flaps (bad JSON across all 3 retries), the
|
||||
helper falls back to the original Phase 2 naive concat shape and
|
||||
leaves dynamic empty."""
|
||||
db = tmp_path / "t.db"
|
||||
apply_migrations(db)
|
||||
import chat.state.group_node # noqa: F401 -- register handlers
|
||||
|
||||
host_canned = json.dumps(
|
||||
{
|
||||
"summary": "BotA appreciated the calm.",
|
||||
"knowledge_facts": [],
|
||||
"relationship_summary": "BotA felt steady.",
|
||||
}
|
||||
)
|
||||
guest_canned = json.dumps(
|
||||
{
|
||||
"summary": "BotB found the room friendly.",
|
||||
"knowledge_facts": [],
|
||||
"relationship_summary": "BotB warmed up.",
|
||||
}
|
||||
)
|
||||
with open_db(db) as conn:
|
||||
_seed_two_bot_scene(conn, with_group_node=True)
|
||||
project(conn)
|
||||
|
||||
# 2 valid POV summaries + 3 bad-JSON merge attempts trip the
|
||||
# classifier's retry-then-default path; the default is the naive
|
||||
# concat fallback.
|
||||
client = MockLLMClient(
|
||||
canned=[host_canned, guest_canned, "bad1", "bad2", "bad3"]
|
||||
)
|
||||
await apply_scene_close_summary(
|
||||
conn,
|
||||
client,
|
||||
classifier_model="x",
|
||||
chat_id="chat_bot_a",
|
||||
scene_id=1,
|
||||
host_bot_id="bot_a",
|
||||
)
|
||||
|
||||
from chat.state.group_node import get_group_node
|
||||
|
||||
gn = get_group_node(conn, "chat_bot_a")
|
||||
assert gn is not None
|
||||
expected = (
|
||||
"BotA: BotA appreciated the calm.\n\n"
|
||||
"BotB: BotB found the room friendly."
|
||||
)
|
||||
assert gn["summary"] == expected
|
||||
assert gn["dynamic"] == ""
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_group_summary_skipped_when_no_guest(tmp_path):
|
||||
"""No-guest path: scene close does NOT invoke merge_group_summary
|
||||
and emits no group_node_updated event. Confirms the existing
|
||||
`if guest_bot_id is not None` gating at the call site."""
|
||||
db = tmp_path / "t.db"
|
||||
apply_migrations(db)
|
||||
canned = json.dumps(
|
||||
{
|
||||
"summary": "BotA helped you talk through the deadline anxiety.",
|
||||
"knowledge_facts": ["Deadline next Friday."],
|
||||
"relationship_summary": "BotA leaned in supportively.",
|
||||
}
|
||||
)
|
||||
with open_db(db) as conn:
|
||||
_seed_single_bot_scene(conn)
|
||||
project(conn)
|
||||
|
||||
# Only 1 canned entry; if merge_group_summary were called the
|
||||
# MockLLMClient would IndexError on the empty queue.
|
||||
client = MockLLMClient(canned=[canned])
|
||||
await apply_scene_close_summary(
|
||||
conn,
|
||||
client,
|
||||
classifier_model="x",
|
||||
chat_id="chat_bot_a",
|
||||
scene_id=1,
|
||||
host_bot_id="bot_a",
|
||||
)
|
||||
|
||||
# Exactly the host POV call consumed, nothing else.
|
||||
assert client._canned == []
|
||||
|
||||
# No group_node_updated event was emitted.
|
||||
rows = conn.execute(
|
||||
"SELECT 1 FROM event_log WHERE kind = 'group_node_updated'"
|
||||
).fetchall()
|
||||
assert rows == []
|
||||
|
||||
Reference in New Issue
Block a user