fix: scope thread detection transcript to closing scene (T80.2)

apply_scene_close_summary fed detect_threads the chat-wide last-50
turns. When a chat has accumulated multiple scenes' worth of dialogue,
that bleeds prior-scene turns into the second close's classifier prompt
and risks mis-attributing threads (closing one that opened earlier,
re-opening one that already closed).

Add an optional ``since_event_id`` kwarg to ``_read_recent_dialogue``
that lower-bounds by event_log id, plus a ``_scene_opened_event_id``
helper that resolves the scene-open event for a given scene_id. Wire
both into the thread-detection call site so its scene_transcript
holds only the closing scene's turns. The per-POV summarizer keeps the
chat-wide approximation it had before — that's intentional.

Adds test_thread_detection_uses_scene_scoped_transcript.
This commit is contained in:
Joseph Doherty
2026-04-26 21:48:44 -04:00
parent d123684f9a
commit dae481eb92
2 changed files with 238 additions and 14 deletions
+135
View File
@@ -1490,3 +1490,138 @@ async def test_scene_close_re_run_does_not_double_suffix(tmp_path):
# from a row whose text already contained the suffix).
inner_count = pov.count("Key quotes:")
assert inner_count == 1
@pytest.mark.asyncio
async def test_thread_detection_uses_scene_scoped_transcript(
tmp_path, monkeypatch
):
"""T80.2: when a chat has multiple closed scenes, the second scene's
close must hand ``detect_threads`` ONLY the second scene's turns —
not the chat-wide last-50, which would bleed in the first scene's
transcript and risk mis-closing threads."""
from chat.services import thread_detection as td_mod
canned = json.dumps(
{
"summary": "BotA had a quick chat.",
"knowledge_facts": [],
"relationship_summary": "Steady.",
}
)
captured_transcripts: list[list[dict]] = []
async def capturing_detect_threads(client, **kwargs):
captured_transcripts.append(list(kwargs["scene_transcript"]))
return td_mod.ThreadDetectionResult()
monkeypatch.setattr(td_mod, "detect_threads", capturing_detect_threads)
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
# Seed scene 1 + 3 turns + close.
_seed_single_bot_scene(conn)
# Add two extra distinct turns inside scene 1 so the transcript
# has clearly-scene-1 markers we can assert on.
append_event(
conn,
kind="user_turn",
payload={
"chat_id": "chat_bot_a",
"prose": "SCENE_ONE_USER_TURN",
"segments": [],
},
)
append_event(
conn,
kind="assistant_turn",
payload={
"chat_id": "chat_bot_a",
"speaker_id": "bot_a",
"text": "SCENE_ONE_BOT_TURN",
"truncated": False,
"user_turn_id": 2,
},
)
project(conn)
# Close scene 1.
client = MockLLMClient(canned=[canned])
await apply_scene_close_summary(
conn,
client,
classifier_model="x",
chat_id="chat_bot_a",
scene_id=1,
host_bot_id="bot_a",
)
# Open scene 2 with distinct dialogue. Use append_and_apply so
# the new events project incrementally without re-running the
# already-applied seed events.
from chat.eventlog.log import append_and_apply
append_and_apply(
conn,
kind="scene_opened",
payload={
"chat_id": "chat_bot_a",
"container_id": 1,
"started_at": "2026-04-26T21:00:00+00:00",
"participants": ["you", "bot_a"],
},
)
append_and_apply(
conn,
kind="memory_written",
payload={
"owner_id": "bot_a",
"chat_id": "chat_bot_a",
"scene_id": 2,
"pov_summary": "Original (scene 2)",
"witness_you": 1,
"witness_host": 1,
"witness_guest": 0,
"significance": 1,
},
)
append_and_apply(
conn,
kind="user_turn",
payload={
"chat_id": "chat_bot_a",
"prose": "SCENE_TWO_USER_TURN",
"segments": [],
},
)
append_and_apply(
conn,
kind="assistant_turn",
payload={
"chat_id": "chat_bot_a",
"speaker_id": "bot_a",
"text": "SCENE_TWO_BOT_TURN",
"truncated": False,
"user_turn_id": 3,
},
)
# Close scene 2.
client2 = MockLLMClient(canned=[canned])
await apply_scene_close_summary(
conn,
client2,
classifier_model="x",
chat_id="chat_bot_a",
scene_id=2,
host_bot_id="bot_a",
)
# The second close's transcript holds only scene-2 markers.
assert len(captured_transcripts) == 2
scene_two_transcript = captured_transcripts[1]
joined = " ".join(t.get("text", "") for t in scene_two_transcript)
assert "SCENE_TWO" in joined
assert "SCENE_ONE" not in joined