feat: meanwhile turn flow (host+guest, no you) (T64)

2026-04-26 21:00:56 -04:00
parent c9d58b8229
commit cf43ba0993
5 changed files with 1061 additions and 5 deletions
@@ -0,0 +1,560 @@
+"""Meanwhile-mode turn flow (T64).
+
+A meanwhile scene runs entirely between two bots — host + guest — with
+"you" absent. The user manually advances the scene by POSTing prose to
+the existing ``/chats/<id>/turns`` endpoint; the route detects the active
+meanwhile scene at the start of ``post_turn`` and dispatches to the
+``process_meanwhile_turn`` controller in ``chat/web/meanwhile.py``.
+
+Coverage:
+
+1. Memory writes for a meanwhile turn carry witness ``[you=0, host=1,
+   guest=1]`` for both the host's and the guest's per-POV memory rows.
+2. State updates after a meanwhile turn run for exactly 2 directed pairs
+   (host -> guest, guest -> host) — no you-related pairs fire.
+3. Speakers alternate across consecutive meanwhile turns: the host
+   speaks first (no prior meanwhile assistant_turn), the guest speaks
+   second (the prior turn's speaker was the host, so this turn's
+   speaker is the OTHER bot).
+4. Scene-close on a meanwhile scene writes per-POV summaries for host +
+   guest only — no "you" POV row is written, mirroring the no-you
+   present_set of the meanwhile scene.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+from fastapi.testclient import TestClient
+
+from chat.app import app
+from chat.db.connection import open_db
+from chat.eventlog.log import append_event
+from chat.eventlog.projector import project
+from chat.llm.mock import MockLLMClient
+import chat.state.meanwhile  # noqa: F401  (registers handlers)
+
+
+def _bot_payload(bot_id: str, name: str) -> dict:
+    return {
+        "id": bot_id,
+        "name": name,
+        "persona": f"persona for {name}",
+        "voice_samples": [],
+        "traits": [],
+        "backstory": "",
+        "initial_relationship_to_you": "",
+        "kickoff_prose": "...",
+    }
+
+
+def _seed_meanwhile_chat(db_path: Path) -> None:
+    """Seed two bots, you, a chat with both wired in, an open parent
+    you-scene, AND an active meanwhile child scene with bot_a + bot_b.
+
+    Edges are seeded for both directed pairs between bot_a and bot_b at
+    schema-default 50/50 so post-turn state-update writes land cleanly.
+    Activities for both bots are recorded so the prompt assembler has
+    something to render.
+    """
+    with open_db(db_path) as conn:
+        append_event(conn, kind="bot_authored", payload=_bot_payload("bot_a", "BotA"))
+        append_event(conn, kind="bot_authored", payload=_bot_payload("bot_b", "BotB"))
+        append_event(
+            conn,
+            kind="you_authored",
+            payload={"name": "Me", "pronouns": "they/them", "persona": ""},
+        )
+        append_event(
+            conn,
+            kind="chat_created",
+            payload={
+                "id": "chat_bot_a",
+                "host_bot_id": "bot_a",
+                "guest_bot_id": "bot_b",
+                "initial_time": "2026-04-26T20:00:00+00:00",
+                "narrative_anchor": "Day 1",
+                "weather": "",
+            },
+        )
+        append_event(
+            conn,
+            kind="container_created",
+            payload={
+                "chat_id": "chat_bot_a",
+                "name": "office",
+                "type": "workplace",
+                "properties": {},
+            },
+        )
+        # Parent (you-scene) opens first.
+        append_event(
+            conn,
+            kind="scene_opened",
+            payload={
+                "chat_id": "chat_bot_a",
+                "container_id": 1,
+                "started_at": "2026-04-26T20:00:00+00:00",
+                "participants": ["you", "bot_a", "bot_b"],
+            },
+        )
+        # Meanwhile child scene — bot_a + bot_b only, parent linked.
+        append_event(
+            conn,
+            kind="meanwhile_scene_started",
+            payload={
+                "scene_id": 2,
+                "chat_id": "chat_bot_a",
+                "parent_scene_id": 1,
+                "host_bot_id": "bot_a",
+                "guest_bot_id": "bot_b",
+                "started_at": "2026-04-26T20:05:00+00:00",
+            },
+        )
+        # Seed both directed edges between the bots so state-update
+        # writes land on initialized rows.
+        for src, tgt in [("bot_a", "bot_b"), ("bot_b", "bot_a")]:
+            append_event(
+                conn,
+                kind="edge_update",
+                payload={
+                    "source_id": src,
+                    "target_id": tgt,
+                    "chat_id": "chat_bot_a",
+                    "knowledge_facts": [],
+                },
+            )
+        for entity_id, verb in [("bot_a", "listening"), ("bot_b", "talking")]:
+            append_event(
+                conn,
+                kind="activity_change",
+                payload={
+                    "entity_id": entity_id,
+                    "posture": "sitting",
+                    "action": {
+                        "verb": verb,
+                        "interruptible": True,
+                        "required_attention": "low",
+                        "expected_duration": "ongoing",
+                    },
+                    "attention": "",
+                    "holding": [],
+                    "status": {},
+                },
+            )
+        project(conn)
+
+
+def _override_llm(canned: list[str]) -> MockLLMClient:
+    from chat.web.kickoff import get_llm_client
+
+    mock = MockLLMClient(canned=list(canned))
+    app.dependency_overrides[get_llm_client] = lambda: mock
+    return mock
+
+
+def _zero_state() -> str:
+    return json.dumps(
+        {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
+    )
+
+
+@pytest.fixture
+def app_state_setup(tmp_path, monkeypatch):
+    cfg = tmp_path / "config.toml"
+    cfg.write_text('featherless_api_key = "test"\n')
+    monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
+    db = tmp_path / "test.db"
+    monkeypatch.setenv("CHAT_DB_PATH", str(db))
+    with TestClient(app) as c:
+        app.state.background_worker.enabled = False
+        yield c
+    app.dependency_overrides.clear()
+
+
+def test_meanwhile_turn_writes_memories_with_witness_0_1_1(
+    app_state_setup, tmp_path
+):
+    """A meanwhile turn writes one ``memory_written`` event per bot — host
+    and guest — with witness flags ``[you=0, host=1, guest=1]``. "You" is
+    not present in the scene, so the witness_you flag must be 0 for both
+    rows.
+
+    Canned queue (4 calls):
+      1. parse_turn (user prose classification)
+      2. narrative stream (host speaks first; no prior meanwhile turn)
+      3. state-update for bot_a -> bot_b
+      4. state-update for bot_b -> bot_a
+    """
+    _seed_meanwhile_chat(tmp_path / "test.db")
+    canned_parse = json.dumps(
+        {"segments": [{"kind": "narration", "text": "they exchange a glance"}]}
+    )
+    canned = [
+        canned_parse,
+        "BotA leans in. *quietly* Tell me what you saw.",
+        _zero_state(),
+        _zero_state(),
+    ]
+    mock = _override_llm(canned)
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns",
+            data={"prose": "they exchange a glance"},
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+    assert mock._canned == []
+
+    with open_db(tmp_path / "test.db") as conn:
+        rows = conn.execute(
+            "SELECT payload_json FROM event_log WHERE kind = 'memory_written' "
+            "ORDER BY id"
+        ).fetchall()
+        payloads = [json.loads(r[0]) for r in rows]
+
+    assert len(payloads) == 2
+    owners = sorted(p["owner_id"] for p in payloads)
+    assert owners == ["bot_a", "bot_b"]
+    for p in payloads:
+        assert p["witness_you"] == 0, p
+        assert p["witness_host"] == 1, p
+        assert p["witness_guest"] == 1, p
+
+
+def test_meanwhile_turn_emits_2_edge_updates_only(app_state_setup, tmp_path):
+    """A meanwhile turn runs state-update for exactly 2 directed pairs:
+    host -> guest and guest -> host. No you-related pairs fire.
+    """
+    _seed_meanwhile_chat(tmp_path / "test.db")
+    canned_parse = json.dumps(
+        {"segments": [{"kind": "narration", "text": "they whisper"}]}
+    )
+    canned = [
+        canned_parse,
+        "BotA whispers. *softly* I noticed something today.",
+        _zero_state(),
+        _zero_state(),
+    ]
+    mock = _override_llm(canned)
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns", data={"prose": "they whisper"}
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+    assert mock._canned == []
+
+    with open_db(tmp_path / "test.db") as conn:
+        # Edge updates landed AFTER the assistant_turn (i.e. excluding
+        # the seed updates done before the request).
+        max_at = conn.execute(
+            "SELECT MAX(id) FROM event_log WHERE kind = 'assistant_turn'"
+        ).fetchone()[0]
+        rows = conn.execute(
+            "SELECT payload_json FROM event_log "
+            "WHERE kind = 'edge_update' AND id > ? ORDER BY id",
+            (max_at,),
+        ).fetchall()
+        payloads = [json.loads(r[0]) for r in rows]
+
+    # Exactly 2 post-turn edge_update events.
+    assert len(payloads) == 2
+    pairs = sorted((p["source_id"], p["target_id"]) for p in payloads)
+    assert pairs == [("bot_a", "bot_b"), ("bot_b", "bot_a")]
+    # And NO you-related pair leaked in.
+    for p in payloads:
+        assert p["source_id"] != "you", p
+        assert p["target_id"] != "you", p
+
+
+def test_meanwhile_turn_alternates_speaker(app_state_setup, tmp_path):
+    """Successive meanwhile turns alternate which bot speaks.
+
+    The first turn has no prior meanwhile ``assistant_turn`` linked to
+    this scene, so the host speaks. The second turn finds the latest
+    such ``assistant_turn``'s speaker (the host) and picks the OTHER
+    bot, so the guest speaks. Each ``assistant_turn`` payload carries
+    ``meanwhile_scene_id`` so the alternation lookup is unambiguous.
+    """
+    _seed_meanwhile_chat(tmp_path / "test.db")
+    canned_parse_1 = json.dumps(
+        {"segments": [{"kind": "narration", "text": "they pause"}]}
+    )
+    canned_1 = [
+        canned_parse_1,
+        "BotA speaks first. *quietly*",
+        _zero_state(),
+        _zero_state(),
+    ]
+    mock = _override_llm(canned_1)
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns", data={"prose": "they pause"}
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+    assert mock._canned == []
+
+    canned_parse_2 = json.dumps(
+        {"segments": [{"kind": "narration", "text": "and again"}]}
+    )
+    canned_2 = [
+        canned_parse_2,
+        "BotB replies. *thoughtfully*",
+        _zero_state(),
+        _zero_state(),
+    ]
+    mock = _override_llm(canned_2)
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns", data={"prose": "and again"}
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+    assert mock._canned == []
+
+    with open_db(tmp_path / "test.db") as conn:
+        rows = conn.execute(
+            "SELECT payload_json FROM event_log "
+            "WHERE kind = 'assistant_turn' ORDER BY id"
+        ).fetchall()
+        payloads = [json.loads(r[0]) for r in rows]
+
+    assert len(payloads) == 2
+    # First turn — host speaks.
+    assert payloads[0]["speaker_id"] == "bot_a"
+    # Second turn — guest speaks (alternation).
+    assert payloads[1]["speaker_id"] == "bot_b"
+    # Both payloads tag this meanwhile scene id so the alternation
+    # lookup can scope to it specifically (not any other assistant_turn
+    # that might exist on the chat).
+    assert payloads[0]["meanwhile_scene_id"] == 2
+    assert payloads[1]["meanwhile_scene_id"] == 2
+    # Both also carry the present_set_kind discriminator for downstream
+    # filters (digest creation, drawer rendering).
+    assert payloads[0]["present_set_kind"] == "host_guest"
+    assert payloads[1]["present_set_kind"] == "host_guest"
+
+
+def test_meanwhile_scene_close_writes_per_pov_for_both_bots_only(
+    app_state_setup, tmp_path
+):
+    """When a meanwhile scene closes, per-POV summary rewrites land for
+    the host and the guest. No write fires for "you" — there is no
+    "you" memory store and no "you" POV in the meanwhile present set.
+    """
+    from chat.services.scene_summarize import apply_scene_close_summary
+    from chat.eventlog.log import append_and_apply
+
+    _seed_meanwhile_chat(tmp_path / "test.db")
+
+    # Run a meanwhile turn first so each bot has a memory row scoped to
+    # the meanwhile scene_id (=2). The per-POV rewrite targets these
+    # rows by ``scene_id``.
+    canned_parse = json.dumps(
+        {"segments": [{"kind": "narration", "text": "they speak quietly"}]}
+    )
+    canned = [
+        canned_parse,
+        "BotA speaks. *quietly*",
+        _zero_state(),
+        _zero_state(),
+    ]
+    mock = _override_llm(canned)
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns",
+            data={"prose": "they speak quietly"},
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+    assert mock._canned == []
+
+    # Close the meanwhile scene and run the close-summary pipeline.
+    # Two POV summaries (host + guest) — no "you" POV.
+    pov_payload_host = json.dumps(
+        {
+            "summary": "BotA reflects on the quiet moment with BotB.",
+            "knowledge_facts": [],
+            "relationship_summary": "",
+        }
+    )
+    pov_payload_guest = json.dumps(
+        {
+            "summary": "BotB notices BotA's reserved manner.",
+            "knowledge_facts": [],
+            "relationship_summary": "",
+        }
+    )
+    close_mock = MockLLMClient(canned=[pov_payload_host, pov_payload_guest])
+
+    import asyncio as _asyncio
+
+    with open_db(tmp_path / "test.db") as conn:
+        # asyncio.run() can't nest under TestClient's loop, but the
+        # close pipeline is awaitable — drive it via a fresh loop here.
+        _loop = _asyncio.new_event_loop()
+        # Mark the meanwhile scene closed via the projector handler.
+        append_and_apply(
+            conn,
+            kind="meanwhile_scene_closed",
+            payload={
+                "scene_id": 2,
+                "closed_at": "2026-04-26T20:30:00+00:00",
+            },
+        )
+
+        # apply_scene_close_summary takes host_bot_id; here we tell it to
+        # operate on the meanwhile scene id (2). With no "you" memory
+        # row to rewrite (witness_you=0 means "you" doesn't have a
+        # memory for this scene), the call must produce per-POV writes
+        # ONLY for bot_a and bot_b.
+        try:
+            _loop.run_until_complete(
+                apply_scene_close_summary(
+                    conn,
+                    close_mock,
+                    classifier_model="x",
+                    chat_id="chat_bot_a",
+                    scene_id=2,
+                    host_bot_id="bot_a",
+                )
+            )
+        finally:
+            _loop.close()
+
+        # Per-POV memory rewrites: count manual_edits with target_kind
+        # ``memory_pov_summary`` whose target_id maps to a memory row
+        # scoped to scene 2.
+        edits = conn.execute(
+            "SELECT payload_json FROM event_log WHERE kind = 'manual_edit'"
+        ).fetchall()
+        pov_edits = []
+        for (raw,) in edits:
+            payload = json.loads(raw)
+            if payload.get("target_kind") != "memory_pov_summary":
+                continue
+            mem_row = conn.execute(
+                "SELECT owner_id, scene_id FROM memories WHERE id = ?",
+                (payload["target_id"],),
+            ).fetchone()
+            if mem_row is None or mem_row[1] != 2:
+                continue
+            pov_edits.append({"owner": mem_row[0], "new": payload["new_value"]})
+
+        # Verify the actual current pov_summary on each bot's memory row
+        # for scene 2 reflects the rewrite.
+        host_pov = conn.execute(
+            "SELECT pov_summary FROM memories WHERE owner_id = ? AND scene_id = ?",
+            ("bot_a", 2),
+        ).fetchone()
+        guest_pov = conn.execute(
+            "SELECT pov_summary FROM memories WHERE owner_id = ? AND scene_id = ?",
+            ("bot_b", 2),
+        ).fetchone()
+        # No "you" memory row should exist for the meanwhile scene —
+        # "you" was never a witness.
+        you_row = conn.execute(
+            "SELECT id FROM memories WHERE owner_id = 'you' AND scene_id = ?",
+            (2,),
+        ).fetchone()
+
+    # Exactly two memory_pov_summary rewrites — one per bot witness.
+    assert len(pov_edits) == 2
+    owners = sorted(e["owner"] for e in pov_edits)
+    assert owners == ["bot_a", "bot_b"]
+    assert host_pov is not None and "BotA reflects" in host_pov[0]
+    assert guest_pov is not None and "BotB notices" in guest_pov[0]
+    # No "you" POV row — meanwhile scenes don't surface a you-memory.
+    assert you_row is None
+
+
+def test_meanwhile_turn_registered_in_in_flight_tasks(
+    app_state_setup, tmp_path
+):
+    """A meanwhile turn registers its streaming task in the chat-keyed
+    ``_in_flight_tasks`` registry the cancel route reads from, and clears
+    the entry after the stream completes.
+
+    Without registration, ``POST /chats/<id>/turns/cancel`` would be a
+    silent no-op for meanwhile beats — the Stop button wouldn't actually
+    stop them. We pin the behaviour via a streaming mock that snapshots
+    ``_in_flight_tasks`` at the moment of its first yield (mid-flight),
+    then assert the entry is removed after the response returns.
+    """
+    from typing import AsyncIterator, Sequence
+
+    from chat.llm.client import Message
+    from chat.web.turns import _in_flight_tasks
+
+    _seed_meanwhile_chat(tmp_path / "test.db")
+
+    # Snapshot of (chat_id-present?, registered task object) captured
+    # at the first stream yield. The closure runs inside the streaming
+    # coroutine, so when it executes the task is alive and registered.
+    in_flight_snapshot: dict = {}
+
+    class _SnapshotMock(MockLLMClient):
+        async def stream(
+            self, messages: Sequence[Message], *, model: str, **params
+        ) -> AsyncIterator[str]:
+            text = self._canned.pop(0)
+            for i, ch in enumerate(text):
+                if i == 0:
+                    # Snapshot at first yield — the post_turn coroutine
+                    # is awaiting our generator and the streaming Task
+                    # is registered in _in_flight_tasks[chat_id].
+                    in_flight_snapshot["present"] = (
+                        "chat_bot_a" in _in_flight_tasks
+                    )
+                    in_flight_snapshot["task"] = _in_flight_tasks.get(
+                        "chat_bot_a"
+                    )
+                yield ch
+
+    canned_parse = json.dumps(
+        {"segments": [{"kind": "narration", "text": "they exchange a glance"}]}
+    )
+    mock = _SnapshotMock(
+        canned=[
+            canned_parse,
+            "BotA leans in. *quietly*",
+            _zero_state(),
+            _zero_state(),
+        ]
+    )
+    from chat.web.kickoff import get_llm_client
+
+    app.dependency_overrides[get_llm_client] = lambda: mock
+    try:
+        # Pre-condition: registry is empty for this chat.
+        assert "chat_bot_a" not in _in_flight_tasks
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns",
+            data={"prose": "they exchange a glance"},
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+
+    # Mid-flight: the streaming task was present in the registry, and
+    # the captured value was an asyncio.Task (not None / not some other
+    # placeholder).
+    import asyncio
+
+    assert in_flight_snapshot.get("present") is True, (
+        "_in_flight_tasks was empty at first yield — meanwhile stream "
+        "isn't registering its task"
+    )
+    assert isinstance(in_flight_snapshot.get("task"), asyncio.Task)
+    # Post-flight: the entry has been cleaned up so the next turn (or
+    # the cancel route) doesn't see a stale task.
+    assert "chat_bot_a" not in _in_flight_tasks