chat/tests/test_meanwhile_turn_flow.py

"""Meanwhile-mode turn flow (T64).

A meanwhile scene runs entirely between two bots — host + guest — with
"you" absent. The user manually advances the scene by POSTing prose to
the existing ``/chats/<id>/turns`` endpoint; the route detects the active
meanwhile scene at the start of ``post_turn`` and dispatches to the
``process_meanwhile_turn`` controller in ``chat/web/meanwhile.py``.

Coverage:

1. Memory writes for a meanwhile turn carry witness ``[you=0, host=1,
   guest=1]`` for both the host's and the guest's per-POV memory rows.
2. State updates after a meanwhile turn run for exactly 2 directed pairs
   (host -> guest, guest -> host) — no you-related pairs fire.
3. Speakers alternate across consecutive meanwhile turns: the host
   speaks first (no prior meanwhile assistant_turn), the guest speaks
   second (the prior turn's speaker was the host, so this turn's
   speaker is the OTHER bot).
4. Scene-close on a meanwhile scene writes per-POV summaries for host +
   guest only — no "you" POV row is written, mirroring the no-you
   present_set of the meanwhile scene.
"""

from __future__ import annotations

import json
from pathlib import Path

import pytest
from fastapi.testclient import TestClient

from chat.app import app
from chat.db.connection import open_db
from chat.eventlog.log import append_event
from chat.eventlog.projector import project
from chat.llm.mock import MockLLMClient
import chat.state.meanwhile  # noqa: F401  (registers handlers)


def _bot_payload(bot_id: str, name: str) -> dict:
    return {
        "id": bot_id,
        "name": name,
        "persona": f"persona for {name}",
        "voice_samples": [],
        "traits": [],
        "backstory": "",
        "initial_relationship_to_you": "",
        "kickoff_prose": "...",
    }


def _seed_meanwhile_chat(db_path: Path) -> None:
    """Seed two bots, you, a chat with both wired in, an open parent
    you-scene, AND an active meanwhile child scene with bot_a + bot_b.

    Edges are seeded for both directed pairs between bot_a and bot_b at
    schema-default 50/50 so post-turn state-update writes land cleanly.
    Activities for both bots are recorded so the prompt assembler has
    something to render.
    """
    with open_db(db_path) as conn:
        append_event(conn, kind="bot_authored", payload=_bot_payload("bot_a", "BotA"))
        append_event(conn, kind="bot_authored", payload=_bot_payload("bot_b", "BotB"))
        append_event(
            conn,
            kind="you_authored",
            payload={"name": "Me", "pronouns": "they/them", "persona": ""},
        )
        append_event(
            conn,
            kind="chat_created",
            payload={
                "id": "chat_bot_a",
                "host_bot_id": "bot_a",
                "guest_bot_id": "bot_b",
                "initial_time": "2026-04-26T20:00:00+00:00",
                "narrative_anchor": "Day 1",
                "weather": "",
            },
        )
        append_event(
            conn,
            kind="container_created",
            payload={
                "chat_id": "chat_bot_a",
                "name": "office",
                "type": "workplace",
                "properties": {},
            },
        )
        # Parent (you-scene) opens first.
        append_event(
            conn,
            kind="scene_opened",
            payload={
                "chat_id": "chat_bot_a",
                "container_id": 1,
                "started_at": "2026-04-26T20:00:00+00:00",
                "participants": ["you", "bot_a", "bot_b"],
            },
        )
        # Meanwhile child scene — bot_a + bot_b only, parent linked.
        append_event(
            conn,
            kind="meanwhile_scene_started",
            payload={
                "scene_id": 2,
                "chat_id": "chat_bot_a",
                "parent_scene_id": 1,
                "host_bot_id": "bot_a",
                "guest_bot_id": "bot_b",
                "started_at": "2026-04-26T20:05:00+00:00",
            },
        )
        # Seed both directed edges between the bots so state-update
        # writes land on initialized rows.
        for src, tgt in [("bot_a", "bot_b"), ("bot_b", "bot_a")]:
            append_event(
                conn,
                kind="edge_update",
                payload={
                    "source_id": src,
                    "target_id": tgt,
                    "chat_id": "chat_bot_a",
                    "knowledge_facts": [],
                },
            )
        for entity_id, verb in [("bot_a", "listening"), ("bot_b", "talking")]:
            append_event(
                conn,
                kind="activity_change",
                payload={
                    "entity_id": entity_id,
                    "posture": "sitting",
                    "action": {
                        "verb": verb,
                        "interruptible": True,
                        "required_attention": "low",
                        "expected_duration": "ongoing",
                    },
                    "attention": "",
                    "holding": [],
                    "status": {},
                },
            )
        project(conn)


def _override_llm(canned: list[str]) -> MockLLMClient:
    from chat.web.kickoff import get_llm_client

    mock = MockLLMClient(canned=list(canned))
    app.dependency_overrides[get_llm_client] = lambda: mock
    return mock


def _zero_state() -> str:
    return json.dumps(
        {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
    )


@pytest.fixture
def app_state_setup(tmp_path, monkeypatch):
    cfg = tmp_path / "config.toml"
    cfg.write_text('featherless_api_key = "test"\n')
    monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
    db = tmp_path / "test.db"
    monkeypatch.setenv("CHAT_DB_PATH", str(db))
    with TestClient(app) as c:
        app.state.background_worker.enabled = False
        yield c
    app.dependency_overrides.clear()


def test_meanwhile_turn_writes_memories_with_witness_0_1_1(
    app_state_setup, tmp_path
):
    """A meanwhile turn writes one ``memory_written`` event per bot — host
    and guest — with witness flags ``[you=0, host=1, guest=1]``. "You" is
    not present in the scene, so the witness_you flag must be 0 for both
    rows.

    Canned queue (4 calls):
      1. parse_turn (user prose classification)
      2. narrative stream (host speaks first; no prior meanwhile turn)
      3. state-update for bot_a -> bot_b
      4. state-update for bot_b -> bot_a
    """
    _seed_meanwhile_chat(tmp_path / "test.db")
    canned_parse = json.dumps(
        {"segments": [{"kind": "narration", "text": "they exchange a glance"}]}
    )
    canned = [
        canned_parse,
        "BotA leans in. *quietly* Tell me what you saw.",
        _zero_state(),
        _zero_state(),
    ]
    mock = _override_llm(canned)
    try:
        response = app_state_setup.post(
            "/chats/chat_bot_a/turns",
            data={"prose": "they exchange a glance"},
        )
        assert response.status_code == 204
    finally:
        app.dependency_overrides.clear()
    assert mock._canned == []

    with open_db(tmp_path / "test.db") as conn:
        rows = conn.execute(
            "SELECT payload_json FROM event_log WHERE kind = 'memory_written' "
            "ORDER BY id"
        ).fetchall()
        payloads = [json.loads(r[0]) for r in rows]

    assert len(payloads) == 2
    owners = sorted(p["owner_id"] for p in payloads)
    assert owners == ["bot_a", "bot_b"]
    for p in payloads:
        assert p["witness_you"] == 0, p
        assert p["witness_host"] == 1, p
        assert p["witness_guest"] == 1, p


def test_meanwhile_turn_emits_2_edge_updates_only(app_state_setup, tmp_path):
    """A meanwhile turn runs state-update for exactly 2 directed pairs:
    host -> guest and guest -> host. No you-related pairs fire.
    """
    _seed_meanwhile_chat(tmp_path / "test.db")
    canned_parse = json.dumps(
        {"segments": [{"kind": "narration", "text": "they whisper"}]}
    )
    canned = [
        canned_parse,
        "BotA whispers. *softly* I noticed something today.",
        _zero_state(),
        _zero_state(),
    ]
    mock = _override_llm(canned)
    try:
        response = app_state_setup.post(
            "/chats/chat_bot_a/turns", data={"prose": "they whisper"}
        )
        assert response.status_code == 204
    finally:
        app.dependency_overrides.clear()
    assert mock._canned == []

    with open_db(tmp_path / "test.db") as conn:
        # Edge updates landed AFTER the assistant_turn (i.e. excluding
        # the seed updates done before the request).
        max_at = conn.execute(
            "SELECT MAX(id) FROM event_log WHERE kind = 'assistant_turn'"
        ).fetchone()[0]
        rows = conn.execute(
            "SELECT payload_json FROM event_log "
            "WHERE kind = 'edge_update' AND id > ? ORDER BY id",
            (max_at,),
        ).fetchall()
        payloads = [json.loads(r[0]) for r in rows]

    # Exactly 2 post-turn edge_update events.
    assert len(payloads) == 2
    pairs = sorted((p["source_id"], p["target_id"]) for p in payloads)
    assert pairs == [("bot_a", "bot_b"), ("bot_b", "bot_a")]
    # And NO you-related pair leaked in.
    for p in payloads:
        assert p["source_id"] != "you", p
        assert p["target_id"] != "you", p


def test_meanwhile_turn_alternates_speaker(app_state_setup, tmp_path):
    """Successive meanwhile turns alternate which bot speaks.

    The first turn has no prior meanwhile ``assistant_turn`` linked to
    this scene, so the host speaks. The second turn finds the latest
    such ``assistant_turn``'s speaker (the host) and picks the OTHER
    bot, so the guest speaks. Each ``assistant_turn`` payload carries
    ``meanwhile_scene_id`` so the alternation lookup is unambiguous.
    """
    _seed_meanwhile_chat(tmp_path / "test.db")
    canned_parse_1 = json.dumps(
        {"segments": [{"kind": "narration", "text": "they pause"}]}
    )
    canned_1 = [
        canned_parse_1,
        "BotA speaks first. *quietly*",
        _zero_state(),
        _zero_state(),
    ]
    mock = _override_llm(canned_1)
    try:
        response = app_state_setup.post(
            "/chats/chat_bot_a/turns", data={"prose": "they pause"}
        )
        assert response.status_code == 204
    finally:
        app.dependency_overrides.clear()
    assert mock._canned == []

    canned_parse_2 = json.dumps(
        {"segments": [{"kind": "narration", "text": "and again"}]}
    )
    canned_2 = [
        canned_parse_2,
        "BotB replies. *thoughtfully*",
        _zero_state(),
        _zero_state(),
    ]
    mock = _override_llm(canned_2)
    try:
        response = app_state_setup.post(
            "/chats/chat_bot_a/turns", data={"prose": "and again"}
        )
        assert response.status_code == 204
    finally:
        app.dependency_overrides.clear()
    assert mock._canned == []

    with open_db(tmp_path / "test.db") as conn:
        rows = conn.execute(
            "SELECT payload_json FROM event_log "
            "WHERE kind = 'assistant_turn' ORDER BY id"
        ).fetchall()
        payloads = [json.loads(r[0]) for r in rows]

    assert len(payloads) == 2
    # First turn — host speaks.
    assert payloads[0]["speaker_id"] == "bot_a"
    # Second turn — guest speaks (alternation).
    assert payloads[1]["speaker_id"] == "bot_b"
    # Both payloads tag this meanwhile scene id so the alternation
    # lookup can scope to it specifically (not any other assistant_turn
    # that might exist on the chat).
    assert payloads[0]["meanwhile_scene_id"] == 2
    assert payloads[1]["meanwhile_scene_id"] == 2
    # Both also carry the present_set_kind discriminator for downstream
    # filters (digest creation, drawer rendering).
    assert payloads[0]["present_set_kind"] == "host_guest"
    assert payloads[1]["present_set_kind"] == "host_guest"


def test_meanwhile_scene_close_writes_per_pov_for_both_bots_only(
    app_state_setup, tmp_path
):
    """When a meanwhile scene closes, per-POV summary rewrites land for
    the host and the guest. No write fires for "you" — there is no
    "you" memory store and no "you" POV in the meanwhile present set.
    """
    from chat.services.scene_summarize import apply_scene_close_summary
    from chat.eventlog.log import append_and_apply

    _seed_meanwhile_chat(tmp_path / "test.db")

    # Run a meanwhile turn first so each bot has a memory row scoped to
    # the meanwhile scene_id (=2). The per-POV rewrite targets these
    # rows by ``scene_id``.
    canned_parse = json.dumps(
        {"segments": [{"kind": "narration", "text": "they speak quietly"}]}
    )
    canned = [
        canned_parse,
        "BotA speaks. *quietly*",
        _zero_state(),
        _zero_state(),
    ]
    mock = _override_llm(canned)
    try:
        response = app_state_setup.post(
            "/chats/chat_bot_a/turns",
            data={"prose": "they speak quietly"},
        )
        assert response.status_code == 204
    finally:
        app.dependency_overrides.clear()
    assert mock._canned == []

    # Close the meanwhile scene and run the close-summary pipeline.
    # Two POV summaries (host + guest) — no "you" POV.
    pov_payload_host = json.dumps(
        {
            "summary": "BotA reflects on the quiet moment with BotB.",
            "knowledge_facts": [],
            "relationship_summary": "",
        }
    )
    pov_payload_guest = json.dumps(
        {
            "summary": "BotB notices BotA's reserved manner.",
            "knowledge_facts": [],
            "relationship_summary": "",
        }
    )
    # T65 added a meanwhile digest summarize call after per-POV writes
    # for meanwhile scenes. T58's thread detection is wrapped in try/except
    # so its IndexError is swallowed gracefully.
    digest_payload = json.dumps(
        {
            "summary": "While you were away, BotA and BotB talked quietly.",
            "knowledge_facts": [],
            "relationship_summary": "",
        }
    )
    close_mock = MockLLMClient(
        canned=[pov_payload_host, pov_payload_guest, digest_payload]
    )

    import asyncio as _asyncio

    with open_db(tmp_path / "test.db") as conn:
        # asyncio.run() can't nest under TestClient's loop, but the
        # close pipeline is awaitable — drive it via a fresh loop here.
        _loop = _asyncio.new_event_loop()
        # Mark the meanwhile scene closed via the projector handler.
        append_and_apply(
            conn,
            kind="meanwhile_scene_closed",
            payload={
                "scene_id": 2,
                "closed_at": "2026-04-26T20:30:00+00:00",
            },
        )

        # apply_scene_close_summary takes host_bot_id; here we tell it to
        # operate on the meanwhile scene id (2). With no "you" memory
        # row to rewrite (witness_you=0 means "you" doesn't have a
        # memory for this scene), the call must produce per-POV writes
        # ONLY for bot_a and bot_b.
        try:
            _loop.run_until_complete(
                apply_scene_close_summary(
                    conn,
                    close_mock,
                    classifier_model="x",
                    chat_id="chat_bot_a",
                    scene_id=2,
                    host_bot_id="bot_a",
                )
            )
        finally:
            _loop.close()

        # Per-POV memory rewrites: count manual_edits with target_kind
        # ``memory_pov_summary`` whose target_id maps to a memory row
        # scoped to scene 2.
        edits = conn.execute(
            "SELECT payload_json FROM event_log WHERE kind = 'manual_edit'"
        ).fetchall()
        pov_edits = []
        for (raw,) in edits:
            payload = json.loads(raw)
            if payload.get("target_kind") != "memory_pov_summary":
                continue
            mem_row = conn.execute(
                "SELECT owner_id, scene_id FROM memories WHERE id = ?",
                (payload["target_id"],),
            ).fetchone()
            if mem_row is None or mem_row[1] != 2:
                continue
            pov_edits.append({"owner": mem_row[0], "new": payload["new_value"]})

        # Verify the actual current pov_summary on each bot's memory row
        # for scene 2 reflects the rewrite.
        host_pov = conn.execute(
            "SELECT pov_summary FROM memories WHERE owner_id = ? AND scene_id = ?",
            ("bot_a", 2),
        ).fetchone()
        guest_pov = conn.execute(
            "SELECT pov_summary FROM memories WHERE owner_id = ? AND scene_id = ?",
            ("bot_b", 2),
        ).fetchone()
        # No "you" memory row should exist for the meanwhile scene —
        # "you" was never a witness.
        you_row = conn.execute(
            "SELECT id FROM memories WHERE owner_id = 'you' AND scene_id = ?",
            (2,),
        ).fetchone()

    # Exactly two memory_pov_summary rewrites — one per bot witness.
    assert len(pov_edits) == 2
    owners = sorted(e["owner"] for e in pov_edits)
    assert owners == ["bot_a", "bot_b"]
    assert host_pov is not None and "BotA reflects" in host_pov[0]
    assert guest_pov is not None and "BotB notices" in guest_pov[0]
    # No "you" POV row — meanwhile scenes don't surface a you-memory.
    assert you_row is None


def test_meanwhile_turn_registered_in_in_flight_tasks(
    app_state_setup, tmp_path
):
    """A meanwhile turn registers its streaming task in the chat-keyed
    ``_in_flight_tasks`` registry the cancel route reads from, and clears
    the entry after the stream completes.

    Without registration, ``POST /chats/<id>/turns/cancel`` would be a
    silent no-op for meanwhile beats — the Stop button wouldn't actually
    stop them. We pin the behaviour via a streaming mock that snapshots
    ``_in_flight_tasks`` at the moment of its first yield (mid-flight),
    then assert the entry is removed after the response returns.
    """
    from typing import AsyncIterator, Sequence

    from chat.llm.client import Message
    from chat.web.turns import _in_flight_tasks

    _seed_meanwhile_chat(tmp_path / "test.db")

    # Snapshot of (chat_id-present?, registered task object) captured
    # at the first stream yield. The closure runs inside the streaming
    # coroutine, so when it executes the task is alive and registered.
    in_flight_snapshot: dict = {}

    class _SnapshotMock(MockLLMClient):
        async def stream(
            self, messages: Sequence[Message], *, model: str, **params
        ) -> AsyncIterator[str]:
            text = self._canned.pop(0)
            for i, ch in enumerate(text):
                if i == 0:
                    # Snapshot at first yield — the post_turn coroutine
                    # is awaiting our generator and the streaming Task
                    # is registered in _in_flight_tasks[chat_id].
                    in_flight_snapshot["present"] = (
                        "chat_bot_a" in _in_flight_tasks
                    )
                    in_flight_snapshot["task"] = _in_flight_tasks.get(
                        "chat_bot_a"
                    )
                yield ch

    canned_parse = json.dumps(
        {"segments": [{"kind": "narration", "text": "they exchange a glance"}]}
    )
    mock = _SnapshotMock(
        canned=[
            canned_parse,
            "BotA leans in. *quietly*",
            _zero_state(),
            _zero_state(),
        ]
    )
    from chat.web.kickoff import get_llm_client

    app.dependency_overrides[get_llm_client] = lambda: mock
    try:
        # Pre-condition: registry is empty for this chat.
        assert "chat_bot_a" not in _in_flight_tasks
        response = app_state_setup.post(
            "/chats/chat_bot_a/turns",
            data={"prose": "they exchange a glance"},
        )
        assert response.status_code == 204
    finally:
        app.dependency_overrides.clear()

    # Mid-flight: the streaming task was present in the registry, and
    # the captured value was an asyncio.Task (not None / not some other
    # placeholder).
    import asyncio

    assert in_flight_snapshot.get("present") is True, (
        "_in_flight_tasks was empty at first yield — meanwhile stream "
        "isn't registering its task"
    )
    assert isinstance(in_flight_snapshot.get("task"), asyncio.Task)
    # Post-flight: the entry has been cleaned up so the next turn (or
    # the cancel route) doesn't see a stale task.
    assert "chat_bot_a" not in _in_flight_tasks


def test_meanwhile_turn_cancellation_via_route(app_state_setup, tmp_path):
    """T85.2: a cancellation that fires while a meanwhile beat is
    streaming truncates the assistant_turn and skips the post-turn
    memory + state-update writes — the same end-to-end shape the
    /turns/cancel route produces.

    Drives the cancel by hijacking ``client.stream`` to raise
    CancelledError on its first iteration — the exact pattern proven
    by ``test_cancelled_turn_still_closes_scene_when_user_prose_signals_close``
    in ``tests/test_turn_flow.py``. This mirrors what
    ``cancel_turn`` does in production (``task.cancel()`` schedules a
    CancelledError on the next await); doing the raise inline avoids
    the TestClient-loop-reentry problem that prevents driving a second
    POST mid-stream from the same synchronous test thread, while
    exercising the same code path: the meanwhile streamer's
    ``except asyncio.CancelledError`` block at meanwhile.py:276 sets
    ``cancelled=True`` + ``truncated=True``, the assistant_turn lands
    with the partial, and the memory/state-update branch is skipped.

    The ``_in_flight_tasks`` registration that wires the cancel route
    to the meanwhile streamer is independently pinned by
    ``test_meanwhile_turn_registered_in_in_flight_tasks`` above; this
    test pins the downstream behavioural shape the registration
    enables — together they cover the full Stop-button lifecycle for
    meanwhile beats.

    Behavioural pins:

    * ``assistant_turn`` lands with ``truncated=True``,
      ``meanwhile_scene_id=2``, ``speaker_id="bot_a"``.
    * No ``memory_written`` events fire (cancel skips per-bot writes).
    * No post-turn ``edge_update`` events fire (cancel skips state updates).
    * ``_in_flight_tasks`` is empty post-flight.
    """
    from typing import AsyncIterator, Sequence

    from chat.llm.client import Message
    from chat.web.turns import _in_flight_tasks

    _seed_meanwhile_chat(tmp_path / "test.db")

    class _CancelOnStreamMock(MockLLMClient):
        """Yields CancelledError on first iteration of ``stream`` —
        simulates ``cancel_turn`` having fired ``task.cancel()`` on the
        in-flight streaming task. ``generate`` is delegated to the
        canned-queue base so parse_turn still resolves cleanly.
        """

        async def stream(
            self, messages: Sequence[Message], *, model: str, **params
        ) -> AsyncIterator[str]:
            raise asyncio.CancelledError
            yield  # pragma: no cover — keeps this an async generator.

    canned_parse = json.dumps(
        {"segments": [{"kind": "narration", "text": "they exchange a glance"}]}
    )
    # Canned queue: only parse_turn — the narrative slot is never pulled
    # because stream raises before consuming it, and post-turn
    # state-update is skipped by the cancel branch.
    mock = _CancelOnStreamMock(canned=[canned_parse])
    from chat.web.kickoff import get_llm_client

    app.dependency_overrides[get_llm_client] = lambda: mock
    try:
        # The meanwhile controller re-raises CancelledError after the
        # partial assistant_turn is recorded (meanwhile.py:387). The
        # outer post_turn route has no catch for CancelledError on the
        # meanwhile path (turns.py:244-254 only catches ValueError), so
        # the exception propagates up through Starlette. TestClient
        # surfaces that as a 500 or a propagated exception depending on
        # Starlette/asyncio versions; we don't pin the response.
        try:
            app_state_setup.post(
                "/chats/chat_bot_a/turns",
                data={"prose": "they exchange a glance"},
            )
        except BaseException:
            pass
    finally:
        app.dependency_overrides.clear()

    with open_db(tmp_path / "test.db") as conn:
        assistant_rows = conn.execute(
            "SELECT payload_json FROM event_log "
            "WHERE kind = 'assistant_turn' ORDER BY id"
        ).fetchall()
        memory_count = conn.execute(
            "SELECT COUNT(*) FROM event_log WHERE kind = 'memory_written'"
        ).fetchone()[0]
        # Edge updates AFTER the assistant_turn (i.e. excluding seeded ones).
        max_at_row = conn.execute(
            "SELECT MAX(id) FROM event_log WHERE kind = 'assistant_turn'"
        ).fetchone()
        max_at = max_at_row[0] if max_at_row[0] is not None else 0
        post_turn_edge_updates = conn.execute(
            "SELECT COUNT(*) FROM event_log "
            "WHERE kind = 'edge_update' AND id > ?",
            (max_at,),
        ).fetchone()[0]

    # The cancelled assistant_turn was still recorded with truncated=True,
    # carrying whatever partial text accumulated before cancel propagated
    # (zero text here since the cancel hits on the first iteration).
    assert len(assistant_rows) == 1
    payload = json.loads(assistant_rows[0][0])
    assert payload["truncated"] is True, payload
    assert payload["meanwhile_scene_id"] == 2
    assert payload["speaker_id"] == "bot_a"

    # No per-bot memory writes — cancellation short-circuits the memory
    # + state-update branch (see chat/web/meanwhile.py:308).
    assert memory_count == 0

    # No post-turn edge_updates — same short-circuit.
    assert post_turn_edge_updates == 0

    # Post-flight: registry cleared so the cancel route won't try to
    # re-cancel a defunct task on a follow-up POST.
    assert "chat_bot_a" not in _in_flight_tasks


def test_meanwhile_cancel_route_no_op_after_turn_completes(
    app_state_setup, tmp_path
):
    """T85.2: POST ``/chats/<id>/turns/cancel`` AFTER a meanwhile turn
    has fully completed is a silent 204 no-op — there is no in-flight
    task to cancel, the registry is empty, and the route must not error.

    Pins the cancel endpoint's robustness against the common-but-racy
    sequence where the user clicks Stop just after the stream finished
    (the SSE channel hasn't yet flipped the client-side ``isStreaming``
    flag). This is a complement to the snapshot test: the snapshot test
    pins that the registry IS populated mid-flight, this test pins that
    it isn't AFTER and that the route copes gracefully.
    """
    from chat.web.turns import _in_flight_tasks

    _seed_meanwhile_chat(tmp_path / "test.db")
    canned_parse = json.dumps(
        {"segments": [{"kind": "narration", "text": "they exchange a glance"}]}
    )
    canned = [
        canned_parse,
        "BotA leans in. *quietly*",
        _zero_state(),
        _zero_state(),
    ]
    mock = _override_llm(canned)
    try:
        response = app_state_setup.post(
            "/chats/chat_bot_a/turns",
            data={"prose": "they exchange a glance"},
        )
        assert response.status_code == 204
    finally:
        app.dependency_overrides.clear()
    assert mock._canned == []

    # Registry was cleaned up after the stream completed.
    assert "chat_bot_a" not in _in_flight_tasks

    # Cancel after-the-fact: 204, no error, registry stays empty.
    cancel_response = app_state_setup.post(
        "/chats/chat_bot_a/turns/cancel"
    )
    assert cancel_response.status_code == 204
    assert "chat_bot_a" not in _in_flight_tasks