merge: T85 JSON-build audit + meanwhile cancel route-level test

This commit is contained in:
Joseph Doherty
2026-04-26 22:42:40 -04:00
+170
View File
@@ -570,3 +570,173 @@ def test_meanwhile_turn_registered_in_in_flight_tasks(
# Post-flight: the entry has been cleaned up so the next turn (or
# the cancel route) doesn't see a stale task.
assert "chat_bot_a" not in _in_flight_tasks
def test_meanwhile_turn_cancellation_via_route(app_state_setup, tmp_path):
"""T85.2: a cancellation that fires while a meanwhile beat is
streaming truncates the assistant_turn and skips the post-turn
memory + state-update writes — the same end-to-end shape the
/turns/cancel route produces.
Drives the cancel by hijacking ``client.stream`` to raise
CancelledError on its first iteration — the exact pattern proven
by ``test_cancelled_turn_still_closes_scene_when_user_prose_signals_close``
in ``tests/test_turn_flow.py``. This mirrors what
``cancel_turn`` does in production (``task.cancel()`` schedules a
CancelledError on the next await); doing the raise inline avoids
the TestClient-loop-reentry problem that prevents driving a second
POST mid-stream from the same synchronous test thread, while
exercising the same code path: the meanwhile streamer's
``except asyncio.CancelledError`` block at meanwhile.py:276 sets
``cancelled=True`` + ``truncated=True``, the assistant_turn lands
with the partial, and the memory/state-update branch is skipped.
The ``_in_flight_tasks`` registration that wires the cancel route
to the meanwhile streamer is independently pinned by
``test_meanwhile_turn_registered_in_in_flight_tasks`` above; this
test pins the downstream behavioural shape the registration
enables — together they cover the full Stop-button lifecycle for
meanwhile beats.
Behavioural pins:
* ``assistant_turn`` lands with ``truncated=True``,
``meanwhile_scene_id=2``, ``speaker_id="bot_a"``.
* No ``memory_written`` events fire (cancel skips per-bot writes).
* No post-turn ``edge_update`` events fire (cancel skips state updates).
* ``_in_flight_tasks`` is empty post-flight.
"""
from typing import AsyncIterator, Sequence
from chat.llm.client import Message
from chat.web.turns import _in_flight_tasks
_seed_meanwhile_chat(tmp_path / "test.db")
class _CancelOnStreamMock(MockLLMClient):
"""Yields CancelledError on first iteration of ``stream`` —
simulates ``cancel_turn`` having fired ``task.cancel()`` on the
in-flight streaming task. ``generate`` is delegated to the
canned-queue base so parse_turn still resolves cleanly.
"""
async def stream(
self, messages: Sequence[Message], *, model: str, **params
) -> AsyncIterator[str]:
raise asyncio.CancelledError
yield # pragma: no cover — keeps this an async generator.
canned_parse = json.dumps(
{"segments": [{"kind": "narration", "text": "they exchange a glance"}]}
)
# Canned queue: only parse_turn — the narrative slot is never pulled
# because stream raises before consuming it, and post-turn
# state-update is skipped by the cancel branch.
mock = _CancelOnStreamMock(canned=[canned_parse])
from chat.web.kickoff import get_llm_client
app.dependency_overrides[get_llm_client] = lambda: mock
try:
# The meanwhile controller re-raises CancelledError after the
# partial assistant_turn is recorded (meanwhile.py:387). The
# outer post_turn route has no catch for CancelledError on the
# meanwhile path (turns.py:244-254 only catches ValueError), so
# the exception propagates up through Starlette. TestClient
# surfaces that as a 500 or a propagated exception depending on
# Starlette/asyncio versions; we don't pin the response.
try:
app_state_setup.post(
"/chats/chat_bot_a/turns",
data={"prose": "they exchange a glance"},
)
except BaseException:
pass
finally:
app.dependency_overrides.clear()
with open_db(tmp_path / "test.db") as conn:
assistant_rows = conn.execute(
"SELECT payload_json FROM event_log "
"WHERE kind = 'assistant_turn' ORDER BY id"
).fetchall()
memory_count = conn.execute(
"SELECT COUNT(*) FROM event_log WHERE kind = 'memory_written'"
).fetchone()[0]
# Edge updates AFTER the assistant_turn (i.e. excluding seeded ones).
max_at_row = conn.execute(
"SELECT MAX(id) FROM event_log WHERE kind = 'assistant_turn'"
).fetchone()
max_at = max_at_row[0] if max_at_row[0] is not None else 0
post_turn_edge_updates = conn.execute(
"SELECT COUNT(*) FROM event_log "
"WHERE kind = 'edge_update' AND id > ?",
(max_at,),
).fetchone()[0]
# The cancelled assistant_turn was still recorded with truncated=True,
# carrying whatever partial text accumulated before cancel propagated
# (zero text here since the cancel hits on the first iteration).
assert len(assistant_rows) == 1
payload = json.loads(assistant_rows[0][0])
assert payload["truncated"] is True, payload
assert payload["meanwhile_scene_id"] == 2
assert payload["speaker_id"] == "bot_a"
# No per-bot memory writes — cancellation short-circuits the memory
# + state-update branch (see chat/web/meanwhile.py:308).
assert memory_count == 0
# No post-turn edge_updates — same short-circuit.
assert post_turn_edge_updates == 0
# Post-flight: registry cleared so the cancel route won't try to
# re-cancel a defunct task on a follow-up POST.
assert "chat_bot_a" not in _in_flight_tasks
def test_meanwhile_cancel_route_no_op_after_turn_completes(
app_state_setup, tmp_path
):
"""T85.2: POST ``/chats/<id>/turns/cancel`` AFTER a meanwhile turn
has fully completed is a silent 204 no-op — there is no in-flight
task to cancel, the registry is empty, and the route must not error.
Pins the cancel endpoint's robustness against the common-but-racy
sequence where the user clicks Stop just after the stream finished
(the SSE channel hasn't yet flipped the client-side ``isStreaming``
flag). This is a complement to the snapshot test: the snapshot test
pins that the registry IS populated mid-flight, this test pins that
it isn't AFTER and that the route copes gracefully.
"""
from chat.web.turns import _in_flight_tasks
_seed_meanwhile_chat(tmp_path / "test.db")
canned_parse = json.dumps(
{"segments": [{"kind": "narration", "text": "they exchange a glance"}]}
)
canned = [
canned_parse,
"BotA leans in. *quietly*",
_zero_state(),
_zero_state(),
]
mock = _override_llm(canned)
try:
response = app_state_setup.post(
"/chats/chat_bot_a/turns",
data={"prose": "they exchange a glance"},
)
assert response.status_code == 204
finally:
app.dependency_overrides.clear()
assert mock._canned == []
# Registry was cleaned up after the stream completed.
assert "chat_bot_a" not in _in_flight_tasks
# Cancel after-the-fact: 204, no error, registry stays empty.
cancel_response = app_state_setup.post(
"/chats/chat_bot_a/turns/cancel"
)
assert cancel_response.status_code == 204
assert "chat_bot_a" not in _in_flight_tasks