Files
chat/tests/test_turn_flow.py
T
Joseph Doherty bfb2ffb6f6 chore: pin scene-close-on-cancel behavior + comment rationale (T74.3)
Phase 2 T44 review noted that scene close still runs when a primary
turn is cancelled mid-stream and asked the implementer to review.

Review finding: the existing behavior is correct, not a bug. The
close-detection branch in post_turn consumes ONLY the user's prose
(fully appended to the event_log BEFORE streaming starts) and the
current container name. It does NOT consume the bot's output. A user
who types "we're done here, fade out" and then hits Stop mid-stream
still meant to close — the cancelled bot beat doesn't invalidate
that intent.

- Document the rationale with an inline comment near the
  close-detection branch in chat/web/turns.py.
- Add regression test
  test_cancelled_turn_still_closes_scene_when_user_prose_signals_close
  that drives a stream raising CancelledError on first iteration and
  asserts the scene_closed event still lands.
2026-04-26 17:40:12 -04:00

899 lines
33 KiB
Python

"""End-to-end turn flow (T19): user POSTs prose, server parses, streams via SSE.
Covers:
- POST ``/chats/<id>/turns`` returns 404 when the chat doesn't exist.
- A successful POST appends both a ``user_turn`` and an ``assistant_turn``
event in chronological order. The assistant payload carries the full
streamed text and ``truncated=False``.
- After a turn lands, the chat detail GET renders the user prose and the
assistant text from the event log.
"""
from __future__ import annotations
import json
from pathlib import Path
import pytest
from fastapi.testclient import TestClient
from chat.app import app
from chat.db.connection import open_db
from chat.eventlog.log import append_event
from chat.eventlog.projector import project
from chat.llm.mock import MockLLMClient
@pytest.fixture
def client(tmp_path, monkeypatch):
cfg = tmp_path / "config.toml"
cfg.write_text('featherless_api_key = "test"\n')
monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
db = tmp_path / "test.db"
monkeypatch.setenv("CHAT_DB_PATH", str(db))
canned_parse = json.dumps(
{"segments": [{"kind": "dialogue", "text": "hello"}]}
)
canned_response = "Hi there."
# Two state-update classifier calls fire after the assistant_turn
# (one per directed edge: bot->you, you->bot). We feed them benign
# zero-delta JSON so the existing assertions about ``user_turn`` /
# ``assistant_turn`` are unaffected.
canned_state_update = json.dumps(
{"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
)
# T26 scene-close detection runs after the state-update pass. These
# tests don't seed an active scene so the classifier is short-circuited
# in turns.py — but the canned slot is harmless to leave in place,
# and adding it documents the order even when the call doesn't fire.
canned_scene_close = json.dumps(
{"should_close": False, "reason": "no signal"}
)
# Import here so env vars are visible to the dependency lookup.
from chat.web.kickoff import get_llm_client
mock = MockLLMClient(
canned=[
canned_parse,
canned_response,
canned_state_update,
canned_state_update,
canned_scene_close,
]
)
app.dependency_overrides[get_llm_client] = lambda: mock
with TestClient(app) as c:
# Disable the lifespan-managed background worker — it would
# otherwise try to score significance through Featherless with
# a fake test API key. Worker behavior is exercised directly in
# tests/test_significance.py with a mock LLM factory.
app.state.background_worker.enabled = False
c.mock_llm = mock # type: ignore[attr-defined]
yield c
app.dependency_overrides.clear()
def _seed(db_path: Path) -> None:
"""Author a bot, create a chat, and seed enough state for prompt assembly."""
with open_db(db_path) as conn:
append_event(
conn,
kind="bot_authored",
payload={
"id": "bot_a",
"name": "BotA",
"persona": "thoughtful, observant",
"voice_samples": [],
"traits": [],
"backstory": "",
"initial_relationship_to_you": "",
"kickoff_prose": "...",
},
)
append_event(
conn,
kind="chat_created",
payload={
"id": "chat_bot_a",
"host_bot_id": "bot_a",
"initial_time": "2026-04-26T20:00:00+00:00",
"narrative_anchor": "Day 1",
"weather": "",
},
)
# Seed an edge so the prompt assembler has something to render.
append_event(
conn,
kind="edge_update",
payload={
"source_id": "bot_a",
"target_id": "you",
"chat_id": "chat_bot_a",
"knowledge_facts": ["coworker"],
},
)
# Activity for both speakers — required by the prompt assembler.
append_event(
conn,
kind="activity_change",
payload={
"entity_id": "you",
"posture": "sitting",
"action": {
"verb": "talking",
"interruptible": True,
"required_attention": "low",
"expected_duration": "ongoing",
},
"attention": "",
"holding": [],
"status": {},
},
)
append_event(
conn,
kind="activity_change",
payload={
"entity_id": "bot_a",
"posture": "sitting",
"action": {
"verb": "listening",
"interruptible": True,
"required_attention": "low",
"expected_duration": "ongoing",
},
"attention": "",
"holding": [],
"status": {},
},
)
project(conn)
def test_post_turn_404_when_chat_missing(client):
response = client.post("/chats/no_such/turns", data={"prose": "hello"})
assert response.status_code == 404
def test_post_turn_appends_user_and_assistant_events(client, tmp_path):
_seed(tmp_path / "test.db")
response = client.post(
"/chats/chat_bot_a/turns", data={"prose": "hello"}
)
assert response.status_code == 204
with open_db(tmp_path / "test.db") as conn:
cur = conn.execute(
"SELECT kind, payload_json FROM event_log "
"WHERE kind IN ('user_turn', 'assistant_turn') ORDER BY id"
)
rows = cur.fetchall()
assert len(rows) == 2
assert rows[0][0] == "user_turn"
assert rows[1][0] == "assistant_turn"
user_payload = json.loads(rows[0][1])
assert user_payload["chat_id"] == "chat_bot_a"
assert user_payload["prose"] == "hello"
# Segments come from the canned classifier output.
assert any(
s.get("kind") == "dialogue" and s.get("text") == "hello"
for s in user_payload["segments"]
)
assistant_payload = json.loads(rows[1][1])
assert assistant_payload["chat_id"] == "chat_bot_a"
assert assistant_payload["speaker_id"] == "bot_a"
assert assistant_payload["text"] == "Hi there."
assert assistant_payload["truncated"] is False
def test_get_chat_renders_existing_turns(client, tmp_path):
_seed(tmp_path / "test.db")
post = client.post("/chats/chat_bot_a/turns", data={"prose": "hello"})
assert post.status_code == 204
response = client.get("/chats/chat_bot_a")
assert response.status_code == 200
body = response.text
assert "hello" in body
assert "Hi there." in body
# ---------------------------------------------------------------------------
# Phase 2 (T44) — multi-entity turn flow.
#
# These tests cover the post_turn flow when a guest is present: addressee
# detection, multi-pair state-update + multi-witness memory writes, and
# the optional interjection follow-on. Each test installs its own
# MockLLMClient with a canned-response queue tailored to the call shape
# of that scenario; the queue is documented at the top of each test so
# the orchestration is auditable.
# ---------------------------------------------------------------------------
def _bot_payload(bot_id: str, name: str, persona: str = "") -> dict:
return {
"id": bot_id,
"name": name,
"persona": persona or f"persona for {name}",
"voice_samples": [],
"traits": [],
"backstory": "",
"initial_relationship_to_you": "",
"kickoff_prose": "...",
}
def _seed_chat_with_guest(db_path: Path) -> None:
"""Author host BotA + guest BotB, create a chat with both wired in,
and seed an open scene plus minimal activity rows so the prompt
assembler sees a third party. Edges are seeded for all six directed
pairs at the schema-default 50/50 baseline so multi-pair state
updates land cleanly."""
with open_db(db_path) as conn:
append_event(conn, kind="bot_authored", payload=_bot_payload("bot_a", "BotA"))
append_event(conn, kind="bot_authored", payload=_bot_payload("bot_b", "BotB"))
append_event(
conn,
kind="you_authored",
payload={"name": "Me", "pronouns": "they/them", "persona": ""},
)
append_event(
conn,
kind="chat_created",
payload={
"id": "chat_bot_a",
"host_bot_id": "bot_a",
"guest_bot_id": "bot_b",
"initial_time": "2026-04-26T20:00:00+00:00",
"narrative_anchor": "Day 1",
"weather": "",
},
)
# Container + open scene so scene_close detection has something
# to act on in the per-POV summary test.
append_event(
conn,
kind="container_created",
payload={
"chat_id": "chat_bot_a",
"name": "office",
"type": "workplace",
"properties": {},
},
)
append_event(
conn,
kind="scene_opened",
payload={
"chat_id": "chat_bot_a",
"container_id": 1,
"started_at": "2026-04-26T20:00:00+00:00",
"participants": ["you", "bot_a", "bot_b"],
},
)
# Seed all six directed edges so state-update writes land on
# initialized rows. Knowledge fact on bot_a -> you exercises
# the existing-fact preservation path.
for src, tgt, facts in [
("bot_a", "you", ["coworker"]),
("you", "bot_a", []),
("bot_b", "you", []),
("you", "bot_b", []),
("bot_a", "bot_b", []),
("bot_b", "bot_a", []),
]:
append_event(
conn,
kind="edge_update",
payload={
"source_id": src,
"target_id": tgt,
"chat_id": "chat_bot_a",
"knowledge_facts": facts,
},
)
for entity_id, verb in [
("you", "talking"),
("bot_a", "listening"),
("bot_b", "listening"),
]:
append_event(
conn,
kind="activity_change",
payload={
"entity_id": entity_id,
"posture": "sitting",
"action": {
"verb": verb,
"interruptible": True,
"required_attention": "low",
"expected_duration": "ongoing",
},
"attention": "",
"holding": [],
"status": {},
},
)
project(conn)
def _override_llm(canned: list[str]) -> MockLLMClient:
"""Wire a fresh ``MockLLMClient`` and return it so tests can introspect
the residual canned queue after the request."""
from chat.web.kickoff import get_llm_client
mock = MockLLMClient(canned=list(canned))
app.dependency_overrides[get_llm_client] = lambda: mock
return mock
def _zero_state() -> str:
return json.dumps(
{"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
)
@pytest.fixture
def app_state_setup(tmp_path, monkeypatch):
"""Same env wiring as the existing ``client`` fixture but without a
pre-installed MockLLMClient — the multi-entity tests pin their own
canned queues per scenario.
"""
cfg = tmp_path / "config.toml"
cfg.write_text('featherless_api_key = "test"\n')
monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
db = tmp_path / "test.db"
monkeypatch.setenv("CHAT_DB_PATH", str(db))
with TestClient(app) as c:
app.state.background_worker.enabled = False
yield c
app.dependency_overrides.clear()
def test_single_bot_turn_no_guest_regression(app_state_setup, tmp_path):
"""No-guest regression: the canned-response queue remains parse +
narrative + 2 state-updates. Interjection is path-bypassed because
the chat has no guest, so ``detect_interjection`` is NOT invoked.
Ends with one user_turn, one assistant_turn, two edge_updates, and a
single ``memory_written``.
"""
_seed(tmp_path / "test.db")
canned_parse = json.dumps(
{"segments": [{"kind": "dialogue", "text": "hello"}]}
)
mock = _override_llm(
[canned_parse, "Hi there.", _zero_state(), _zero_state()]
)
try:
response = app_state_setup.post(
"/chats/chat_bot_a/turns", data={"prose": "hello"}
)
assert response.status_code == 204
finally:
app.dependency_overrides.clear()
# No guest -> no interjection classifier call -> queue fully drained.
assert mock._canned == []
with open_db(tmp_path / "test.db") as conn:
cur = conn.execute(
"SELECT kind FROM event_log "
"WHERE kind IN ('user_turn', 'assistant_turn', 'edge_update', "
" 'memory_written') ORDER BY id"
)
kinds = [r[0] for r in cur.fetchall()]
user_turns = [k for k in kinds if k == "user_turn"]
assistant_turns = [k for k in kinds if k == "assistant_turn"]
edge_updates_after_seed = [k for k in kinds if k == "edge_update"]
memory_writes = [k for k in kinds if k == "memory_written"]
assert len(user_turns) == 1
assert len(assistant_turns) == 1
# Seed adds exactly one edge_update (bot_a -> you); the post-turn
# pass adds two more for a total of three.
assert len(edge_updates_after_seed) == 3
assert len(memory_writes) == 1
def test_multi_bot_turn_no_interjection(app_state_setup, tmp_path):
"""Chat has a guest; ``detect_interjection`` returns False. Verify:
1 user_turn + 1 assistant_turn + 6 *post-turn* edge_updates + 2
memory_written events. Single turn_html broadcast.
Canned queue (11 calls):
1. parse_turn
2. detect_addressee (T74.1) -> host
3. narrative stream (primary, addressee = host because the prose
doesn't name the guest)
4-9. 6 state-update calls (one per directed pair across {you,
bot_a, bot_b})
10. detect_interjection -> should_interject=False
11. detect_scene_close -> should_close=False
"""
_seed_chat_with_guest(tmp_path / "test.db")
canned_parse = json.dumps(
{"segments": [{"kind": "dialogue", "text": "hello room"}]}
)
canned = [
canned_parse,
json.dumps(
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
),
"Greetings.",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
json.dumps({"should_interject": False, "reason": "calm"}),
json.dumps({"should_close": False, "reason": "no signal"}),
]
mock = _override_llm(canned)
try:
response = app_state_setup.post(
"/chats/chat_bot_a/turns", data={"prose": "hello room"}
)
assert response.status_code == 204
finally:
app.dependency_overrides.clear()
# All 10 canned slots should have been consumed.
assert mock._canned == []
with open_db(tmp_path / "test.db") as conn:
# Count post-turn edge_updates (i.e. those after the latest
# assistant_turn id).
max_at = conn.execute(
"SELECT MAX(id) FROM event_log WHERE kind = 'assistant_turn'"
).fetchone()[0]
cur = conn.execute(
"SELECT COUNT(*) FROM event_log "
"WHERE kind = 'edge_update' AND id > ?",
(max_at,),
)
post_turn_edge_updates = cur.fetchone()[0]
cur = conn.execute(
"SELECT COUNT(*) FROM event_log WHERE kind = 'user_turn'"
)
user_turn_count = cur.fetchone()[0]
cur = conn.execute(
"SELECT COUNT(*) FROM event_log WHERE kind = 'assistant_turn'"
)
assistant_turn_count = cur.fetchone()[0]
cur = conn.execute(
"SELECT COUNT(*) FROM event_log WHERE kind = 'memory_written'"
)
memory_count = cur.fetchone()[0]
assert user_turn_count == 1
assert assistant_turn_count == 1
assert post_turn_edge_updates == 6
assert memory_count == 2
def test_multi_bot_turn_with_interjection(app_state_setup, tmp_path):
"""Chat has a guest; ``detect_interjection`` returns True. Verify:
1 user_turn + 2 assistant_turns + (6 + 6) post-turn edge_updates +
4 memory_written events.
Canned queue (17 calls):
1. parse_turn
2. detect_addressee (T74.1) -> host
3. narrative stream (primary)
4-9. 6 state-update calls (post-primary)
10. detect_interjection -> should_interject=True
11. narrative stream (interjection)
12-17. 6 state-update calls (post-interjection)
18. detect_scene_close -> should_close=False
"""
_seed_chat_with_guest(tmp_path / "test.db")
canned_parse = json.dumps(
{"segments": [{"kind": "dialogue", "text": "tell me"}]}
)
canned = [
canned_parse,
json.dumps(
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
),
"Primary beat.",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
json.dumps({"should_interject": True, "reason": "jealous"}),
"Interjection beat!",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
json.dumps({"should_close": False, "reason": "no signal"}),
]
mock = _override_llm(canned)
try:
response = app_state_setup.post(
"/chats/chat_bot_a/turns", data={"prose": "tell me"}
)
assert response.status_code == 204
finally:
app.dependency_overrides.clear()
assert mock._canned == []
with open_db(tmp_path / "test.db") as conn:
cur = conn.execute(
"SELECT COUNT(*) FROM event_log WHERE kind = 'assistant_turn'"
)
assistant_count = cur.fetchone()[0]
cur = conn.execute(
"SELECT COUNT(*) FROM event_log WHERE kind = 'memory_written'"
)
memory_count = cur.fetchone()[0]
# All edge_updates after the FIRST assistant_turn are post-turn.
first_at = conn.execute(
"SELECT MIN(id) FROM event_log WHERE kind = 'assistant_turn'"
).fetchone()[0]
post_turn_edges = conn.execute(
"SELECT COUNT(*) FROM event_log "
"WHERE kind = 'edge_update' AND id > ?",
(first_at,),
).fetchone()[0]
# Both assistant_turn payloads should reference the same user_turn
# and the second one tags ``interjection_of`` the first speaker.
rows = conn.execute(
"SELECT payload_json FROM event_log "
"WHERE kind = 'assistant_turn' ORDER BY id"
).fetchall()
first_payload = json.loads(rows[0][0])
second_payload = json.loads(rows[1][0])
assert assistant_count == 2
assert memory_count == 4
assert post_turn_edges == 12
assert first_payload["text"] == "Primary beat."
assert second_payload["text"] == "Interjection beat!"
# The silent witness is the bot that wasn't the primary addressee.
assert second_payload["interjection_of"] == first_payload["speaker_id"]
assert second_payload["speaker_id"] != first_payload["speaker_id"]
assert first_payload["user_turn_id"] == second_payload["user_turn_id"]
def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
app_state_setup, tmp_path
):
"""Chat has a guest, prose hard-signals a scene close, classifier
confirms. Verify a ``scene_closed`` event lands and per-POV summary
rewrites fire for both bots (memory.pov_summary changes for each).
Interjection short-circuits at False so the queue stays compact.
Canned queue (13 calls):
1. parse_turn
2. detect_addressee (T74.1) -> host
3. narrative stream (primary)
4-9. 6 state-update calls
10. detect_interjection -> False (no follow-on stream)
11. detect_scene_close -> True
12. apply_scene_close_summary host POV
13. apply_scene_close_summary guest POV
"""
_seed_chat_with_guest(tmp_path / "test.db")
canned_parse = json.dumps(
{
"segments": [
{"kind": "narration", "text": "we are done here, fade out"}
]
}
)
pov_payload = json.dumps(
{
"summary": "BotA noticed the day winding down.",
"knowledge_facts": [],
"relationship_summary": "warmer",
}
)
pov_payload_guest = json.dumps(
{
"summary": "BotB watched the scene close.",
"knowledge_facts": [],
"relationship_summary": "warmer",
}
)
canned = [
canned_parse,
json.dumps(
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
),
"Goodnight.",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
json.dumps({"should_interject": False, "reason": "calm"}),
json.dumps({"should_close": True, "reason": "fade out signaled"}),
pov_payload,
pov_payload_guest,
]
mock = _override_llm(canned)
try:
response = app_state_setup.post(
"/chats/chat_bot_a/turns", data={"prose": "we are done here, fade out"}
)
assert response.status_code == 204
finally:
app.dependency_overrides.clear()
assert mock._canned == []
with open_db(tmp_path / "test.db") as conn:
cur = conn.execute(
"SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'"
)
scene_close_count = cur.fetchone()[0]
# One memory_pov_summary manual_edit per witness.
cur = conn.execute(
"SELECT payload_json FROM event_log WHERE kind = 'manual_edit'"
)
manual_edits = [json.loads(r[0]) for r in cur.fetchall()]
pov_edits = [
e for e in manual_edits
if e.get("target_kind") == "memory_pov_summary"
]
# After the rewrite, bot_a's scene-1 memory carries the host POV
# and bot_b's scene-1 memory carries the guest POV.
host_pov = conn.execute(
"SELECT pov_summary FROM memories WHERE owner_id = ? AND scene_id = 1",
("bot_a",),
).fetchone()
guest_pov = conn.execute(
"SELECT pov_summary FROM memories WHERE owner_id = ? AND scene_id = 1",
("bot_b",),
).fetchone()
assert scene_close_count == 1
# Two memory rewrites — one per witness.
assert len(pov_edits) == 2
assert host_pov is not None and "BotA noticed" in host_pov[0]
assert guest_pov is not None and "BotB watched" in guest_pov[0]
def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
"""T74.1: the multi-entity addressee call goes through the classifier;
when the classifier returns the guest, the primary turn routes there.
Interjection (when fired) makes the host the silent witness and the
second assistant_turn carries the host as speaker.
Canned queue (with classifier-led addressee = guest):
1. parse_turn
2. detect_addressee -> bot_b (the guest)
3. narrative stream (primary, addressee = guest)
4-9. 6 state-update calls
10. detect_interjection -> True
11. interjection narrative stream
12-17. 6 state-update calls (post-interjection)
18. detect_scene_close -> False
"""
_seed_chat_with_guest(tmp_path / "test.db")
canned_parse = json.dumps(
{"segments": [{"kind": "dialogue", "text": "BotB, what do you think?"}]}
)
canned = [
canned_parse,
json.dumps(
{
"addressee_id": "bot_b",
"confidence": "high",
"reason": "user named BotB",
}
),
"BotB pondering.",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
json.dumps({"should_interject": True, "reason": "host wants in"}),
"BotA chiming in.",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
json.dumps({"should_close": False, "reason": "no signal"}),
]
mock = _override_llm(canned)
try:
response = app_state_setup.post(
"/chats/chat_bot_a/turns",
data={"prose": "BotB, what do you think?"},
)
assert response.status_code == 204
finally:
app.dependency_overrides.clear()
assert mock._canned == []
with open_db(tmp_path / "test.db") as conn:
rows = conn.execute(
"SELECT payload_json FROM event_log "
"WHERE kind = 'assistant_turn' ORDER BY id"
).fetchall()
primary_payload = json.loads(rows[0][0])
interjection_payload = json.loads(rows[1][0])
# Primary speaker is the guest because the addressee classifier
# picked bot_b for the prose ("BotB, what do you think?").
assert primary_payload["speaker_id"] == "bot_b"
# Interjection follow-on goes to the silent witness — the host.
assert interjection_payload["speaker_id"] == "bot_a"
assert interjection_payload["interjection_of"] == "bot_b"
def test_cancelled_turn_still_closes_scene_when_user_prose_signals_close(
app_state_setup, tmp_path
):
"""T74.3 regression: a cancelled primary stream still triggers scene
close when the user prose carries a hard close signal.
Rationale (also documented in turns.py near the close-detection
branch): close detection only consumes the user's prose, which is
fully appended to the event_log BEFORE streaming starts. The
cancelled bot beat doesn't invalidate the user's intent to close.
Implementation: install a MockLLMClient whose ``stream`` raises
CancelledError on the first iteration. The classifier calls (parse,
addressee, scene_close, per-POV summaries) are still served from
the canned queue. The post_turn route ultimately re-raises
CancelledError after recording the partial — TestClient surfaces
that as an exception, so we drive the request inside ``with
pytest.raises``. Despite the exception, the scene_closed event
must land in the event_log.
"""
from typing import AsyncIterator, Sequence
_seed_chat_with_guest(tmp_path / "test.db")
canned_parse = json.dumps(
{"segments": [{"kind": "narration", "text": "we are done here, fade out"}]}
)
pov_payload = json.dumps(
{
"summary": "BotA noticed the day winding down.",
"knowledge_facts": [],
"relationship_summary": "warmer",
}
)
pov_payload_guest = json.dumps(
{
"summary": "BotB watched the scene close.",
"knowledge_facts": [],
"relationship_summary": "warmer",
}
)
# Canned queue: parse + addressee + 6 state-updates +
# scene_close=True + 2 per-POV summaries. NO interjection slot
# because the cancel path short-circuits the interjection branch.
canned = [
canned_parse,
json.dumps(
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
),
# NOTE: no narrative slot — the stream is hijacked below to
# raise CancelledError on first iteration; it never pulls a
# canned response.
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
json.dumps({"should_close": True, "reason": "fade out signaled"}),
pov_payload,
pov_payload_guest,
]
class _CancelOnStreamMock:
"""Mock LLM client that serves ``generate`` from a canned queue
and raises CancelledError on the FIRST iteration of ``stream``.
Mirrors :class:`chat.llm.mock.MockLLMClient` for ``generate`` but
diverges on ``stream`` to simulate a mid-stream cancel.
"""
def __init__(self, canned: list[str]) -> None:
self._canned = list(canned)
async def generate(
self, messages: Sequence, *, model: str, **params
) -> str:
return self._canned.pop(0)
async def stream(
self, messages: Sequence, *, model: str, **params
) -> AsyncIterator[str]:
# Yield a CancelledError on first iteration to simulate the
# /turns/cancel route firing mid-stream.
raise asyncio.CancelledError
yield # pragma: no cover — keeps this an async generator.
from chat.web.kickoff import get_llm_client
mock = _CancelOnStreamMock(canned=list(canned))
app.dependency_overrides[get_llm_client] = lambda: mock
try:
# FastAPI/Starlette handles the re-raised CancelledError as an
# internal failure — TestClient surfaces it as a 500 response.
# We don't assert on the status here; the regression is whether
# the scene_closed event still landed in the event_log.
try:
app_state_setup.post(
"/chats/chat_bot_a/turns",
data={"prose": "we are done here, fade out"},
)
except BaseException:
# Some Starlette/asyncio versions propagate the
# CancelledError out of the test client; that's fine — the
# partial-record + scene-close still ran before the raise.
pass
finally:
app.dependency_overrides.clear()
with open_db(tmp_path / "test.db") as conn:
scene_close_count = conn.execute(
"SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'"
).fetchone()[0]
assistant_payload = conn.execute(
"SELECT payload_json FROM event_log "
"WHERE kind = 'assistant_turn' ORDER BY id"
).fetchall()
# Scene close lands despite the cancel.
assert scene_close_count == 1
# The cancelled assistant_turn was still recorded (truncated=True).
assert len(assistant_payload) == 1
assert json.loads(assistant_payload[0][0])["truncated"] is True
def test_interjection_enqueues_significance_job(app_state_setup, tmp_path):
"""T74.2: when an interjection fires, the interjection memory is
enqueued for significance scoring just like the primary memory.
Capture enqueued ``SignificanceJob``s by replacing the background
worker's ``enqueue`` method with a list-append. Without T74.2, the
interjection memory would never be scored — only the primary's
enqueue would land. We therefore expect TWO jobs after a turn that
has both a primary and an interjection beat: one for the primary
memory, one for the interjection memory.
"""
_seed_chat_with_guest(tmp_path / "test.db")
canned_parse = json.dumps(
{"segments": [{"kind": "dialogue", "text": "tell me"}]}
)
canned = [
canned_parse,
json.dumps(
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
),
"Primary beat.",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
json.dumps({"should_interject": True, "reason": "jealous"}),
"Interjection beat!",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
json.dumps({"should_close": False, "reason": "no signal"}),
]
_override_llm(canned)
captured_jobs: list = []
worker = app.state.background_worker
# Re-enable enqueue capture even though the worker's loop is disabled
# — we want to count enqueues without the loop running classifier work.
worker.enabled = True
original_enqueue = worker.enqueue
worker.enqueue = captured_jobs.append # type: ignore[assignment]
try:
response = app_state_setup.post(
"/chats/chat_bot_a/turns", data={"prose": "tell me"}
)
assert response.status_code == 204
finally:
worker.enqueue = original_enqueue # type: ignore[assignment]
worker.enabled = False
app.dependency_overrides.clear()
# Expect 2 enqueues: 1 for the primary memory + 1 for the
# interjection memory.
assert len(captured_jobs) == 2
# Both jobs should reference distinct memory ids — the primary's
# host-POV memory and the interjection's host-POV memory.
memory_ids = [job.memory_id for job in captured_jobs]
assert len(set(memory_ids)) == 2
# The two narrative texts should be the two streamed beats.
narrative_texts = sorted(job.narrative_text for job in captured_jobs)
assert narrative_texts == ["Interjection beat!", "Primary beat."]