fix: scope thread detection transcript to closing scene (T80.2)
apply_scene_close_summary fed detect_threads the chat-wide last-50 turns. When a chat has accumulated multiple scenes' worth of dialogue, that bleeds prior-scene turns into the second close's classifier prompt and risks mis-attributing threads (closing one that opened earlier, re-opening one that already closed). Add an optional ``since_event_id`` kwarg to ``_read_recent_dialogue`` that lower-bounds by event_log id, plus a ``_scene_opened_event_id`` helper that resolves the scene-open event for a given scene_id. Wire both into the thread-detection call site so its scene_transcript holds only the closing scene's turns. The per-POV summarizer keeps the chat-wide approximation it had before — that's intentional. Adds test_thread_detection_uses_scene_scoped_transcript.
This commit is contained in:
@@ -1490,3 +1490,138 @@ async def test_scene_close_re_run_does_not_double_suffix(tmp_path):
|
||||
# from a row whose text already contained the suffix).
|
||||
inner_count = pov.count("Key quotes:")
|
||||
assert inner_count == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_thread_detection_uses_scene_scoped_transcript(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
"""T80.2: when a chat has multiple closed scenes, the second scene's
|
||||
close must hand ``detect_threads`` ONLY the second scene's turns —
|
||||
not the chat-wide last-50, which would bleed in the first scene's
|
||||
transcript and risk mis-closing threads."""
|
||||
from chat.services import thread_detection as td_mod
|
||||
|
||||
canned = json.dumps(
|
||||
{
|
||||
"summary": "BotA had a quick chat.",
|
||||
"knowledge_facts": [],
|
||||
"relationship_summary": "Steady.",
|
||||
}
|
||||
)
|
||||
|
||||
captured_transcripts: list[list[dict]] = []
|
||||
|
||||
async def capturing_detect_threads(client, **kwargs):
|
||||
captured_transcripts.append(list(kwargs["scene_transcript"]))
|
||||
return td_mod.ThreadDetectionResult()
|
||||
|
||||
monkeypatch.setattr(td_mod, "detect_threads", capturing_detect_threads)
|
||||
|
||||
db = tmp_path / "t.db"
|
||||
apply_migrations(db)
|
||||
with open_db(db) as conn:
|
||||
# Seed scene 1 + 3 turns + close.
|
||||
_seed_single_bot_scene(conn)
|
||||
# Add two extra distinct turns inside scene 1 so the transcript
|
||||
# has clearly-scene-1 markers we can assert on.
|
||||
append_event(
|
||||
conn,
|
||||
kind="user_turn",
|
||||
payload={
|
||||
"chat_id": "chat_bot_a",
|
||||
"prose": "SCENE_ONE_USER_TURN",
|
||||
"segments": [],
|
||||
},
|
||||
)
|
||||
append_event(
|
||||
conn,
|
||||
kind="assistant_turn",
|
||||
payload={
|
||||
"chat_id": "chat_bot_a",
|
||||
"speaker_id": "bot_a",
|
||||
"text": "SCENE_ONE_BOT_TURN",
|
||||
"truncated": False,
|
||||
"user_turn_id": 2,
|
||||
},
|
||||
)
|
||||
project(conn)
|
||||
|
||||
# Close scene 1.
|
||||
client = MockLLMClient(canned=[canned])
|
||||
await apply_scene_close_summary(
|
||||
conn,
|
||||
client,
|
||||
classifier_model="x",
|
||||
chat_id="chat_bot_a",
|
||||
scene_id=1,
|
||||
host_bot_id="bot_a",
|
||||
)
|
||||
|
||||
# Open scene 2 with distinct dialogue. Use append_and_apply so
|
||||
# the new events project incrementally without re-running the
|
||||
# already-applied seed events.
|
||||
from chat.eventlog.log import append_and_apply
|
||||
|
||||
append_and_apply(
|
||||
conn,
|
||||
kind="scene_opened",
|
||||
payload={
|
||||
"chat_id": "chat_bot_a",
|
||||
"container_id": 1,
|
||||
"started_at": "2026-04-26T21:00:00+00:00",
|
||||
"participants": ["you", "bot_a"],
|
||||
},
|
||||
)
|
||||
append_and_apply(
|
||||
conn,
|
||||
kind="memory_written",
|
||||
payload={
|
||||
"owner_id": "bot_a",
|
||||
"chat_id": "chat_bot_a",
|
||||
"scene_id": 2,
|
||||
"pov_summary": "Original (scene 2)",
|
||||
"witness_you": 1,
|
||||
"witness_host": 1,
|
||||
"witness_guest": 0,
|
||||
"significance": 1,
|
||||
},
|
||||
)
|
||||
append_and_apply(
|
||||
conn,
|
||||
kind="user_turn",
|
||||
payload={
|
||||
"chat_id": "chat_bot_a",
|
||||
"prose": "SCENE_TWO_USER_TURN",
|
||||
"segments": [],
|
||||
},
|
||||
)
|
||||
append_and_apply(
|
||||
conn,
|
||||
kind="assistant_turn",
|
||||
payload={
|
||||
"chat_id": "chat_bot_a",
|
||||
"speaker_id": "bot_a",
|
||||
"text": "SCENE_TWO_BOT_TURN",
|
||||
"truncated": False,
|
||||
"user_turn_id": 3,
|
||||
},
|
||||
)
|
||||
|
||||
# Close scene 2.
|
||||
client2 = MockLLMClient(canned=[canned])
|
||||
await apply_scene_close_summary(
|
||||
conn,
|
||||
client2,
|
||||
classifier_model="x",
|
||||
chat_id="chat_bot_a",
|
||||
scene_id=2,
|
||||
host_bot_id="bot_a",
|
||||
)
|
||||
|
||||
# The second close's transcript holds only scene-2 markers.
|
||||
assert len(captured_transcripts) == 2
|
||||
scene_two_transcript = captured_transcripts[1]
|
||||
joined = " ".join(t.get("text", "") for t in scene_two_transcript)
|
||||
assert "SCENE_TWO" in joined
|
||||
assert "SCENE_ONE" not in joined
|
||||
|
||||
Reference in New Issue
Block a user