fix: scope thread detection transcript to closing scene (T80.2)

apply_scene_close_summary fed detect_threads the chat-wide last-50 turns. When a chat has accumulated multiple scenes' worth of dialogue, that bleeds prior-scene turns into the second close's classifier prompt and risks mis-attributing threads (closing one that opened earlier, re-opening one that already closed). Add an optional ``since_event_id`` kwarg to ``_read_recent_dialogue`` that lower-bounds by event_log id, plus a ``_scene_opened_event_id`` helper that resolves the scene-open event for a given scene_id. Wire both into the thread-detection call site so its scene_transcript holds only the closing scene's turns. The per-POV summarizer keeps the chat-wide approximation it had before — that's intentional. Adds test_thread_detection_uses_scene_scoped_transcript.
2026-04-26 21:48:44 -04:00
parent d123684f9a
commit dae481eb92
2 changed files with 238 additions and 14 deletions
@@ -1490,3 +1490,138 @@ async def test_scene_close_re_run_does_not_double_suffix(tmp_path):
            # from a row whose text already contained the suffix).
            inner_count = pov.count("Key quotes:")
            assert inner_count == 1
+
+
+@pytest.mark.asyncio
+async def test_thread_detection_uses_scene_scoped_transcript(
+    tmp_path, monkeypatch
+):
+    """T80.2: when a chat has multiple closed scenes, the second scene's
+    close must hand ``detect_threads`` ONLY the second scene's turns —
+    not the chat-wide last-50, which would bleed in the first scene's
+    transcript and risk mis-closing threads."""
+    from chat.services import thread_detection as td_mod
+
+    canned = json.dumps(
+        {
+            "summary": "BotA had a quick chat.",
+            "knowledge_facts": [],
+            "relationship_summary": "Steady.",
+        }
+    )
+
+    captured_transcripts: list[list[dict]] = []
+
+    async def capturing_detect_threads(client, **kwargs):
+        captured_transcripts.append(list(kwargs["scene_transcript"]))
+        return td_mod.ThreadDetectionResult()
+
+    monkeypatch.setattr(td_mod, "detect_threads", capturing_detect_threads)
+
+    db = tmp_path / "t.db"
+    apply_migrations(db)
+    with open_db(db) as conn:
+        # Seed scene 1 + 3 turns + close.
+        _seed_single_bot_scene(conn)
+        # Add two extra distinct turns inside scene 1 so the transcript
+        # has clearly-scene-1 markers we can assert on.
+        append_event(
+            conn,
+            kind="user_turn",
+            payload={
+                "chat_id": "chat_bot_a",
+                "prose": "SCENE_ONE_USER_TURN",
+                "segments": [],
+            },
+        )
+        append_event(
+            conn,
+            kind="assistant_turn",
+            payload={
+                "chat_id": "chat_bot_a",
+                "speaker_id": "bot_a",
+                "text": "SCENE_ONE_BOT_TURN",
+                "truncated": False,
+                "user_turn_id": 2,
+            },
+        )
+        project(conn)
+
+        # Close scene 1.
+        client = MockLLMClient(canned=[canned])
+        await apply_scene_close_summary(
+            conn,
+            client,
+            classifier_model="x",
+            chat_id="chat_bot_a",
+            scene_id=1,
+            host_bot_id="bot_a",
+        )
+
+        # Open scene 2 with distinct dialogue. Use append_and_apply so
+        # the new events project incrementally without re-running the
+        # already-applied seed events.
+        from chat.eventlog.log import append_and_apply
+
+        append_and_apply(
+            conn,
+            kind="scene_opened",
+            payload={
+                "chat_id": "chat_bot_a",
+                "container_id": 1,
+                "started_at": "2026-04-26T21:00:00+00:00",
+                "participants": ["you", "bot_a"],
+            },
+        )
+        append_and_apply(
+            conn,
+            kind="memory_written",
+            payload={
+                "owner_id": "bot_a",
+                "chat_id": "chat_bot_a",
+                "scene_id": 2,
+                "pov_summary": "Original (scene 2)",
+                "witness_you": 1,
+                "witness_host": 1,
+                "witness_guest": 0,
+                "significance": 1,
+            },
+        )
+        append_and_apply(
+            conn,
+            kind="user_turn",
+            payload={
+                "chat_id": "chat_bot_a",
+                "prose": "SCENE_TWO_USER_TURN",
+                "segments": [],
+            },
+        )
+        append_and_apply(
+            conn,
+            kind="assistant_turn",
+            payload={
+                "chat_id": "chat_bot_a",
+                "speaker_id": "bot_a",
+                "text": "SCENE_TWO_BOT_TURN",
+                "truncated": False,
+                "user_turn_id": 3,
+            },
+        )
+
+        # Close scene 2.
+        client2 = MockLLMClient(canned=[canned])
+        await apply_scene_close_summary(
+            conn,
+            client2,
+            classifier_model="x",
+            chat_id="chat_bot_a",
+            scene_id=2,
+            host_bot_id="bot_a",
+        )
+
+        # The second close's transcript holds only scene-2 markers.
+        assert len(captured_transcripts) == 2
+        scene_two_transcript = captured_transcripts[1]
+        joined = " ".join(t.get("text", "") for t in scene_two_transcript)
+        assert "SCENE_TWO" in joined
+        assert "SCENE_ONE" not in joined