feat: T44 multi-entity turn flow with interjection support

Rewrites post_turn for the multi-entity world: - Addressee detection via case-insensitive whole-word match against the guest name; defaults to host on no-match or both-match. - Multi-entity prompt assembly: forwards guest_id so the prompt sees the third party's activity / edges / group-node. - Multi-witness memory write: record_turn_memory_for_present writes one memory per present bot witness when a guest is in the room. - Multi-pair state-update: compute_state_updates_for_present emits one edge_update per directed pair (6 with a guest, 2 without). - Interjection branch (T39): when a guest is present and the primary beat completes, the silent witness may follow on. detect_interjection decides; on True we stream a second narrative as the witness, append a second assistant_turn linked to the same user_turn_id, and re-run the multi-pair state update + memory write for the follow-on beat. Cancel collapses both halves; a cancelled interjection skips its downstream passes so we don't classifier-spam against a half-formed beat. - Scene-close runs after both beats so apply_scene_close_summary sees the full closing scene; T45's guest-aware summarizer handles per-POV rewrites for each present witness. regenerate.py mirrors the prompt / memory / state-update changes for 1:1 and multi-entity scenes. Per the Phase 2 spec, interjection regeneration is deferred to Phase 2.5 — regenerate only re-streams the addressee turn for v2. Tests: adds 5 cases to tests/test_turn_flow.py covering the no-guest regression, multi-bot without interjection, multi-bot with interjection, scene-close per-POV rewrites, and addressee routing on a named-bot prose. Each test pins its own canned MockLLMClient queue with the call shape documented in the docstring.
2026-04-26 16:18:38 -04:00
parent 44c8735b27
commit c86b0df411
3 changed files with 1012 additions and 209 deletions
@@ -202,3 +202,487 @@ def test_get_chat_renders_existing_turns(client, tmp_path):
    body = response.text
    assert "hello" in body
    assert "Hi there." in body
+
+
+# ---------------------------------------------------------------------------
+# Phase 2 (T44) — multi-entity turn flow.
+#
+# These tests cover the post_turn flow when a guest is present: addressee
+# detection, multi-pair state-update + multi-witness memory writes, and
+# the optional interjection follow-on. Each test installs its own
+# MockLLMClient with a canned-response queue tailored to the call shape
+# of that scenario; the queue is documented at the top of each test so
+# the orchestration is auditable.
+# ---------------------------------------------------------------------------
+
+
+def _bot_payload(bot_id: str, name: str, persona: str = "") -> dict:
+    return {
+        "id": bot_id,
+        "name": name,
+        "persona": persona or f"persona for {name}",
+        "voice_samples": [],
+        "traits": [],
+        "backstory": "",
+        "initial_relationship_to_you": "",
+        "kickoff_prose": "...",
+    }
+
+
+def _seed_chat_with_guest(db_path: Path) -> None:
+    """Author host BotA + guest BotB, create a chat with both wired in,
+    and seed an open scene plus minimal activity rows so the prompt
+    assembler sees a third party. Edges are seeded for all six directed
+    pairs at the schema-default 50/50 baseline so multi-pair state
+    updates land cleanly."""
+    with open_db(db_path) as conn:
+        append_event(conn, kind="bot_authored", payload=_bot_payload("bot_a", "BotA"))
+        append_event(conn, kind="bot_authored", payload=_bot_payload("bot_b", "BotB"))
+        append_event(
+            conn,
+            kind="you_authored",
+            payload={"name": "Me", "pronouns": "they/them", "persona": ""},
+        )
+        append_event(
+            conn,
+            kind="chat_created",
+            payload={
+                "id": "chat_bot_a",
+                "host_bot_id": "bot_a",
+                "guest_bot_id": "bot_b",
+                "initial_time": "2026-04-26T20:00:00+00:00",
+                "narrative_anchor": "Day 1",
+                "weather": "",
+            },
+        )
+        # Container + open scene so scene_close detection has something
+        # to act on in the per-POV summary test.
+        append_event(
+            conn,
+            kind="container_created",
+            payload={
+                "chat_id": "chat_bot_a",
+                "name": "office",
+                "type": "workplace",
+                "properties": {},
+            },
+        )
+        append_event(
+            conn,
+            kind="scene_opened",
+            payload={
+                "chat_id": "chat_bot_a",
+                "container_id": 1,
+                "started_at": "2026-04-26T20:00:00+00:00",
+                "participants": ["you", "bot_a", "bot_b"],
+            },
+        )
+        # Seed all six directed edges so state-update writes land on
+        # initialized rows. Knowledge fact on bot_a -> you exercises
+        # the existing-fact preservation path.
+        for src, tgt, facts in [
+            ("bot_a", "you", ["coworker"]),
+            ("you", "bot_a", []),
+            ("bot_b", "you", []),
+            ("you", "bot_b", []),
+            ("bot_a", "bot_b", []),
+            ("bot_b", "bot_a", []),
+        ]:
+            append_event(
+                conn,
+                kind="edge_update",
+                payload={
+                    "source_id": src,
+                    "target_id": tgt,
+                    "chat_id": "chat_bot_a",
+                    "knowledge_facts": facts,
+                },
+            )
+        for entity_id, verb in [
+            ("you", "talking"),
+            ("bot_a", "listening"),
+            ("bot_b", "listening"),
+        ]:
+            append_event(
+                conn,
+                kind="activity_change",
+                payload={
+                    "entity_id": entity_id,
+                    "posture": "sitting",
+                    "action": {
+                        "verb": verb,
+                        "interruptible": True,
+                        "required_attention": "low",
+                        "expected_duration": "ongoing",
+                    },
+                    "attention": "",
+                    "holding": [],
+                    "status": {},
+                },
+            )
+        project(conn)
+
+
+def _override_llm(canned: list[str]) -> MockLLMClient:
+    """Wire a fresh ``MockLLMClient`` and return it so tests can introspect
+    the residual canned queue after the request."""
+    from chat.web.kickoff import get_llm_client
+
+    mock = MockLLMClient(canned=list(canned))
+    app.dependency_overrides[get_llm_client] = lambda: mock
+    return mock
+
+
+def _zero_state() -> str:
+    return json.dumps(
+        {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
+    )
+
+
+@pytest.fixture
+def app_state_setup(tmp_path, monkeypatch):
+    """Same env wiring as the existing ``client`` fixture but without a
+    pre-installed MockLLMClient — the multi-entity tests pin their own
+    canned queues per scenario.
+    """
+    cfg = tmp_path / "config.toml"
+    cfg.write_text('featherless_api_key = "test"\n')
+    monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
+    db = tmp_path / "test.db"
+    monkeypatch.setenv("CHAT_DB_PATH", str(db))
+    with TestClient(app) as c:
+        app.state.background_worker.enabled = False
+        yield c
+    app.dependency_overrides.clear()
+
+
+def test_single_bot_turn_no_guest_regression(app_state_setup, tmp_path):
+    """No-guest regression: the canned-response queue remains parse +
+    narrative + 2 state-updates. Interjection is path-bypassed because
+    the chat has no guest, so ``detect_interjection`` is NOT invoked.
+    Ends with one user_turn, one assistant_turn, two edge_updates, and a
+    single ``memory_written``.
+    """
+    _seed(tmp_path / "test.db")
+    canned_parse = json.dumps(
+        {"segments": [{"kind": "dialogue", "text": "hello"}]}
+    )
+    mock = _override_llm(
+        [canned_parse, "Hi there.", _zero_state(), _zero_state()]
+    )
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns", data={"prose": "hello"}
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+
+    # No guest -> no interjection classifier call -> queue fully drained.
+    assert mock._canned == []
+
+    with open_db(tmp_path / "test.db") as conn:
+        cur = conn.execute(
+            "SELECT kind FROM event_log "
+            "WHERE kind IN ('user_turn', 'assistant_turn', 'edge_update', "
+            "  'memory_written') ORDER BY id"
+        )
+        kinds = [r[0] for r in cur.fetchall()]
+    user_turns = [k for k in kinds if k == "user_turn"]
+    assistant_turns = [k for k in kinds if k == "assistant_turn"]
+    edge_updates_after_seed = [k for k in kinds if k == "edge_update"]
+    memory_writes = [k for k in kinds if k == "memory_written"]
+    assert len(user_turns) == 1
+    assert len(assistant_turns) == 1
+    # Seed adds exactly one edge_update (bot_a -> you); the post-turn
+    # pass adds two more for a total of three.
+    assert len(edge_updates_after_seed) == 3
+    assert len(memory_writes) == 1
+
+
+def test_multi_bot_turn_no_interjection(app_state_setup, tmp_path):
+    """Chat has a guest; ``detect_interjection`` returns False. Verify:
+    1 user_turn + 1 assistant_turn + 6 *post-turn* edge_updates + 2
+    memory_written events. Single turn_html broadcast.
+
+    Canned queue (8 calls):
+      1. parse_turn
+      2. narrative stream (primary, addressee = host because the prose
+         doesn't name the guest)
+      3-8. 6 state-update calls (one per directed pair across {you,
+         bot_a, bot_b})
+      9. detect_interjection -> should_interject=False
+      10. detect_scene_close -> should_close=False
+    """
+    _seed_chat_with_guest(tmp_path / "test.db")
+    canned_parse = json.dumps(
+        {"segments": [{"kind": "dialogue", "text": "hello room"}]}
+    )
+    canned = [
+        canned_parse,
+        "Greetings.",
+        _zero_state(), _zero_state(), _zero_state(),
+        _zero_state(), _zero_state(), _zero_state(),
+        json.dumps({"should_interject": False, "reason": "calm"}),
+        json.dumps({"should_close": False, "reason": "no signal"}),
+    ]
+    mock = _override_llm(canned)
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns", data={"prose": "hello room"}
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+    # All 10 canned slots should have been consumed.
+    assert mock._canned == []
+
+    with open_db(tmp_path / "test.db") as conn:
+        # Count post-turn edge_updates (i.e. those after the latest
+        # assistant_turn id).
+        max_at = conn.execute(
+            "SELECT MAX(id) FROM event_log WHERE kind = 'assistant_turn'"
+        ).fetchone()[0]
+        cur = conn.execute(
+            "SELECT COUNT(*) FROM event_log "
+            "WHERE kind = 'edge_update' AND id > ?",
+            (max_at,),
+        )
+        post_turn_edge_updates = cur.fetchone()[0]
+
+        cur = conn.execute(
+            "SELECT COUNT(*) FROM event_log WHERE kind = 'user_turn'"
+        )
+        user_turn_count = cur.fetchone()[0]
+        cur = conn.execute(
+            "SELECT COUNT(*) FROM event_log WHERE kind = 'assistant_turn'"
+        )
+        assistant_turn_count = cur.fetchone()[0]
+        cur = conn.execute(
+            "SELECT COUNT(*) FROM event_log WHERE kind = 'memory_written'"
+        )
+        memory_count = cur.fetchone()[0]
+
+    assert user_turn_count == 1
+    assert assistant_turn_count == 1
+    assert post_turn_edge_updates == 6
+    assert memory_count == 2
+
+
+def test_multi_bot_turn_with_interjection(app_state_setup, tmp_path):
+    """Chat has a guest; ``detect_interjection`` returns True. Verify:
+    1 user_turn + 2 assistant_turns + (6 + 6) post-turn edge_updates +
+    4 memory_written events.
+
+    Canned queue (16 calls):
+      1. parse_turn
+      2. narrative stream (primary)
+      3-8. 6 state-update calls (post-primary)
+      9. detect_interjection -> should_interject=True
+      10. narrative stream (interjection)
+      11-16. 6 state-update calls (post-interjection)
+      17. detect_scene_close -> should_close=False
+    """
+    _seed_chat_with_guest(tmp_path / "test.db")
+    canned_parse = json.dumps(
+        {"segments": [{"kind": "dialogue", "text": "tell me"}]}
+    )
+    canned = [
+        canned_parse,
+        "Primary beat.",
+        _zero_state(), _zero_state(), _zero_state(),
+        _zero_state(), _zero_state(), _zero_state(),
+        json.dumps({"should_interject": True, "reason": "jealous"}),
+        "Interjection beat!",
+        _zero_state(), _zero_state(), _zero_state(),
+        _zero_state(), _zero_state(), _zero_state(),
+        json.dumps({"should_close": False, "reason": "no signal"}),
+    ]
+    mock = _override_llm(canned)
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns", data={"prose": "tell me"}
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+    assert mock._canned == []
+
+    with open_db(tmp_path / "test.db") as conn:
+        cur = conn.execute(
+            "SELECT COUNT(*) FROM event_log WHERE kind = 'assistant_turn'"
+        )
+        assistant_count = cur.fetchone()[0]
+        cur = conn.execute(
+            "SELECT COUNT(*) FROM event_log WHERE kind = 'memory_written'"
+        )
+        memory_count = cur.fetchone()[0]
+        # All edge_updates after the FIRST assistant_turn are post-turn.
+        first_at = conn.execute(
+            "SELECT MIN(id) FROM event_log WHERE kind = 'assistant_turn'"
+        ).fetchone()[0]
+        post_turn_edges = conn.execute(
+            "SELECT COUNT(*) FROM event_log "
+            "WHERE kind = 'edge_update' AND id > ?",
+            (first_at,),
+        ).fetchone()[0]
+
+        # Both assistant_turn payloads should reference the same user_turn
+        # and the second one tags ``interjection_of`` the first speaker.
+        rows = conn.execute(
+            "SELECT payload_json FROM event_log "
+            "WHERE kind = 'assistant_turn' ORDER BY id"
+        ).fetchall()
+        first_payload = json.loads(rows[0][0])
+        second_payload = json.loads(rows[1][0])
+
+    assert assistant_count == 2
+    assert memory_count == 4
+    assert post_turn_edges == 12
+    assert first_payload["text"] == "Primary beat."
+    assert second_payload["text"] == "Interjection beat!"
+    # The silent witness is the bot that wasn't the primary addressee.
+    assert second_payload["interjection_of"] == first_payload["speaker_id"]
+    assert second_payload["speaker_id"] != first_payload["speaker_id"]
+    assert first_payload["user_turn_id"] == second_payload["user_turn_id"]
+
+
+def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
+    app_state_setup, tmp_path
+):
+    """Chat has a guest, prose hard-signals a scene close, classifier
+    confirms. Verify a ``scene_closed`` event lands and per-POV summary
+    rewrites fire for both bots (memory.pov_summary changes for each).
+    Interjection short-circuits at False so the queue stays compact.
+
+    Canned queue (12 calls):
+      1. parse_turn
+      2. narrative stream (primary)
+      3-8. 6 state-update calls
+      9. detect_interjection -> False (no follow-on stream)
+      10. detect_scene_close -> True
+      11. apply_scene_close_summary host POV
+      12. apply_scene_close_summary guest POV
+    """
+    _seed_chat_with_guest(tmp_path / "test.db")
+    canned_parse = json.dumps(
+        {
+            "segments": [
+                {"kind": "narration", "text": "we are done here, fade out"}
+            ]
+        }
+    )
+    pov_payload = json.dumps(
+        {
+            "summary": "BotA noticed the day winding down.",
+            "knowledge_facts": [],
+            "relationship_summary": "warmer",
+        }
+    )
+    pov_payload_guest = json.dumps(
+        {
+            "summary": "BotB watched the scene close.",
+            "knowledge_facts": [],
+            "relationship_summary": "warmer",
+        }
+    )
+    canned = [
+        canned_parse,
+        "Goodnight.",
+        _zero_state(), _zero_state(), _zero_state(),
+        _zero_state(), _zero_state(), _zero_state(),
+        json.dumps({"should_interject": False, "reason": "calm"}),
+        json.dumps({"should_close": True, "reason": "fade out signaled"}),
+        pov_payload,
+        pov_payload_guest,
+    ]
+    mock = _override_llm(canned)
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns", data={"prose": "we are done here, fade out"}
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+    assert mock._canned == []
+
+    with open_db(tmp_path / "test.db") as conn:
+        cur = conn.execute(
+            "SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'"
+        )
+        scene_close_count = cur.fetchone()[0]
+        # One memory_pov_summary manual_edit per witness.
+        cur = conn.execute(
+            "SELECT payload_json FROM event_log WHERE kind = 'manual_edit'"
+        )
+        manual_edits = [json.loads(r[0]) for r in cur.fetchall()]
+        pov_edits = [
+            e for e in manual_edits
+            if e.get("target_kind") == "memory_pov_summary"
+        ]
+        # After the rewrite, bot_a's scene-1 memory carries the host POV
+        # and bot_b's scene-1 memory carries the guest POV.
+        host_pov = conn.execute(
+            "SELECT pov_summary FROM memories WHERE owner_id = ? AND scene_id = 1",
+            ("bot_a",),
+        ).fetchone()
+        guest_pov = conn.execute(
+            "SELECT pov_summary FROM memories WHERE owner_id = ? AND scene_id = 1",
+            ("bot_b",),
+        ).fetchone()
+
+    assert scene_close_count == 1
+    # Two memory rewrites — one per witness.
+    assert len(pov_edits) == 2
+    assert host_pov is not None and "BotA noticed" in host_pov[0]
+    assert guest_pov is not None and "BotB watched" in guest_pov[0]
+
+
+def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
+    """Prose that names the guest by name routes the primary turn to the
+    guest. Interjection (when fired) makes the host the silent witness
+    and the second assistant_turn carries the host as speaker.
+
+    Canned queue: same shape as the with-interjection test (16 calls)
+    plus the trailing scene_close decision.
+    """
+    _seed_chat_with_guest(tmp_path / "test.db")
+    canned_parse = json.dumps(
+        {"segments": [{"kind": "dialogue", "text": "BotB, what do you think?"}]}
+    )
+    canned = [
+        canned_parse,
+        "BotB pondering.",
+        _zero_state(), _zero_state(), _zero_state(),
+        _zero_state(), _zero_state(), _zero_state(),
+        json.dumps({"should_interject": True, "reason": "host wants in"}),
+        "BotA chiming in.",
+        _zero_state(), _zero_state(), _zero_state(),
+        _zero_state(), _zero_state(), _zero_state(),
+        json.dumps({"should_close": False, "reason": "no signal"}),
+    ]
+    mock = _override_llm(canned)
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns",
+            data={"prose": "BotB, what do you think?"},
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+    assert mock._canned == []
+
+    with open_db(tmp_path / "test.db") as conn:
+        rows = conn.execute(
+            "SELECT payload_json FROM event_log "
+            "WHERE kind = 'assistant_turn' ORDER BY id"
+        ).fetchall()
+        primary_payload = json.loads(rows[0][0])
+        interjection_payload = json.loads(rows[1][0])
+
+    # Primary speaker is the guest because the prose names BotB and not
+    # BotA (case-insensitive whole-word match).
+    assert primary_payload["speaker_id"] == "bot_b"
+    # Interjection follow-on goes to the silent witness — the host.
+    assert interjection_payload["speaker_id"] == "bot_a"
+    assert interjection_payload["interjection_of"] == "bot_b"