From 4afaf01de7350456ce0f619080508465b1ffd3ea Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 07:03:20 -0400 Subject: [PATCH] test: structured CannedQueue fixture builder for classifier mocks (T116) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 4.5 carry-over from Phase 3. Tests across test_turn_flow.py, test_meanwhile_turn_flow.py, and the phase3/4 integration suites built positional canned-response arrays for MockLLMClient — adding a new classifier call to a code path required updating the array index in many places. This change ships tests/fixtures.py with a fluent CannedQueue builder that lets tests declare classifier expectations by name and call order instead of by index. Each method appends one item to an internal queue and returns self for chaining; build() emits the flat list[str] queue that MockLLMClient(canned=...) already consumes. The mock's contract is unchanged. Builder methods cover: parse_turn, detect_addressee, state_update (with zero_state alias), detect_interjection, detect_interjection_targeted, detect_scene_close, detect_event_transitions, summarize_scene_pov, detect_threads, meanwhile_digest, score_significance, and a narrative() helper for streaming bot beats. raw() is a passthrough escape hatch. Migration scope: ship the builder + 2 sanity tests + migrate 3 representative tests in test_turn_flow.py as proof of concept (test_single_bot_turn_no_guest_regression, test_turn_with_event_transition_appends_started_event, test_turn_with_no_active_events_skips_classifier). The remaining positional-array tests stay as-is; the builder docstring documents the migration template for Phase 5 work. --- tests/fixtures.py | 383 ++++++++++++++++++++++++++++++++++++++++ tests/test_fixtures.py | 140 +++++++++++++++ tests/test_turn_flow.py | 72 ++++---- 3 files changed, 563 insertions(+), 32 deletions(-) create mode 100644 tests/fixtures.py create mode 100644 tests/test_fixtures.py diff --git a/tests/fixtures.py b/tests/fixtures.py new file mode 100644 index 0000000..6ad952b --- /dev/null +++ b/tests/fixtures.py @@ -0,0 +1,383 @@ +"""Structured test-fixture builder for ``MockLLMClient`` canned queues. + +Phase 4.5 (T116) carry-over from Phase 3. The turn-flow tests in +``test_turn_flow.py``, ``test_meanwhile_turn_flow.py``, +``test_phase3_integration.py``, and ``test_phase4_integration.py`` used +to construct ``MockLLMClient`` canned-response queues as raw positional +lists of pre-encoded JSON strings. That worked, but every time a new +classifier call landed in a code path the tests had to be patched in +many places at the right index — easy to mis-position, hard to read. + +This module ships :class:`CannedQueue`, a fluent builder that lets a +test declare its classifier expectations by **name** and **order** of +call, not by index into a brittle list. Each method appends one item +to the queue and returns ``self`` for chaining; ``build()`` JSON-encodes +the items and produces the flat ``list[str]`` that +``MockLLMClient(canned=...)`` expects. + +Usage +----- + +>>> from tests.fixtures import CannedQueue +>>> from chat.llm.mock import MockLLMClient +>>> canned = ( +... CannedQueue() +... .parse_turn(segments=[{"kind": "dialogue", "text": "hello"}]) +... .narrative("Hi there.") +... .state_update() +... .state_update() +... .build() +... ) +>>> mock = MockLLMClient(canned=canned) + +Each method maps to a single classifier (or stream) call that the turn +flow makes, in the order the production code makes them. Picking the +right method for the slot you need keeps the test readable and lets the +builder pin sensible defaults for the fields tests don't care about. + +Migration template +------------------ + +To migrate a positional canned-array test: + +1. Identify each slot in the existing array and what classifier it + feeds. Comments above the array often spell this out — start there. +2. Replace each slot with the matching :class:`CannedQueue` method: + + - ``json.dumps({"segments": [...]})`` → ``.parse_turn(segments=...)`` + - bare narrative string → ``.narrative("...")`` + - zero-state JSON → ``.state_update()`` (defaults are zeros) + - ``json.dumps({"addressee_id": ...})`` → ``.detect_addressee(...)`` + - ``json.dumps({"should_interject": ...})`` → ``.detect_interjection(...)`` + - ``json.dumps({"should_close": ...})`` → ``.detect_scene_close(...)`` + - ``json.dumps({"transitions": [...]})`` → ``.detect_event_transitions(...)`` + - per-POV summary JSON → ``.summarize_scene_pov(summary=...)`` +3. End with ``.build()`` and pass that to + ``MockLLMClient(canned=...)``. The mock's contract is unchanged. + +Notes on streams +---------------- + +``MockLLMClient.stream`` and ``MockLLMClient.generate`` share one queue +— each pop is one entry, regardless of whether the production code +streams the response or generates it whole. The narrative service +streams; classifier services generate. The builder treats both the same: +``narrative()`` appends a raw string, the classifier methods append +JSON-encoded dicts. Both end up in the same flat ``list[str]`` that the +mock pops from in order. + +The remaining tests in the suite (about 30 across the four files +mentioned above) still use positional arrays — Phase 5 work to migrate +the rest. New tests should prefer this builder. +""" + +from __future__ import annotations + +import json +from typing import Any + + +class CannedQueue: + """Fluent builder for ``MockLLMClient`` canned-response queues. + + Each method appends one item to an internal queue and returns + ``self`` for chaining. ``build()`` returns the flat ``list[str]`` + suitable for ``MockLLMClient(canned=...)``. + + The queue holds either ``dict`` (JSON-encoded at ``build()`` time) + or ``str`` (passed through verbatim — used for narrative streams). + """ + + def __init__(self) -> None: + self._queue: list[Any] = [] + + # ------------------------------------------------------------------ + # Narrative stream — bare string, no JSON wrapping. + # ------------------------------------------------------------------ + + def narrative(self, text: str) -> "CannedQueue": + """Append one streaming narrative response. + + ``MockLLMClient.stream`` pops the next entry from the same queue + as ``generate`` — a bare string is what the streaming bot beat + consumes. Use one ``narrative()`` per assistant beat (primary, + and optionally an interjection / second beat). + """ + self._queue.append(text) + return self + + def raw(self, value: str) -> "CannedQueue": + """Append a raw string (escape hatch for non-classifier calls). + + Most tests should reach for the named helpers — this is here + for one-offs the builder doesn't model yet. + """ + self._queue.append(value) + return self + + # ------------------------------------------------------------------ + # Turn parser — splits user prose into segments. + # ------------------------------------------------------------------ + + def parse_turn( + self, + *, + segments: list[dict] | None = None, + intent: str = "narrative", + landing_state_hint: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one ``parse_turn`` classifier response. + + ``intent`` defaults to ``"narrative"``; pass ``"skip_elision"`` + or ``"skip_jump"`` to exercise the natural-language skip paths. + ``landing_state_hint`` carries the residual descriptor for + elision skips and is otherwise ignored. + """ + payload: dict[str, Any] = { + "segments": segments if segments is not None else [], + "intent": intent, + "landing_state_hint": landing_state_hint, + } + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Multi-entity addressee classifier (T74.1). + # ------------------------------------------------------------------ + + def detect_addressee( + self, + *, + addressee_id: str, + confidence: str = "medium", + reason: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one ``detect_addressee`` classifier response.""" + payload: dict[str, Any] = { + "addressee_id": addressee_id, + "confidence": confidence, + "reason": reason, + } + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # State-update — one per directed edge per turn. + # ------------------------------------------------------------------ + + def state_update( + self, + *, + affinity_delta: int = 0, + trust_delta: int = 0, + knowledge_facts: list | None = None, + **rest: Any, + ) -> "CannedQueue": + """Append one ``apply_state_update`` classifier response. + + Defaults to a benign zero-delta payload — tests that don't care + about state mutations can call this without arguments. One call + is required per directed edge that fires after the assistant + beat (e.g. single-bot non-guest turn = 2 calls; multi-bot guest + turn = 6 calls). + """ + payload: dict[str, Any] = { + "affinity_delta": affinity_delta, + "trust_delta": trust_delta, + "knowledge_facts": ( + knowledge_facts if knowledge_facts is not None else [] + ), + } + payload.update(rest) + self._queue.append(payload) + return self + + def zero_state(self) -> "CannedQueue": + """Alias for ``state_update()`` with all defaults — matches the + ``_zero_state()`` helper in existing tests. + """ + return self.state_update() + + # ------------------------------------------------------------------ + # Interjection (T74.2) — silent witness chimes in. + # ------------------------------------------------------------------ + + def detect_interjection( + self, + *, + should_interject: bool, + reason: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one ``detect_interjection`` classifier response.""" + payload: dict[str, Any] = { + "should_interject": should_interject, + "reason": reason, + } + payload.update(rest) + self._queue.append(payload) + return self + + def detect_interjection_targeted( + self, + *, + targeted: bool, + target_id: str | None = None, + reason: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one targeted-interjection classifier response.""" + payload: dict[str, Any] = { + "targeted": targeted, + "target_id": target_id, + "reason": reason, + } + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Scene-close detector (T26). + # ------------------------------------------------------------------ + + def detect_scene_close( + self, + *, + should_close: bool, + reason: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one ``detect_scene_close`` classifier response.""" + payload: dict[str, Any] = { + "should_close": should_close, + "reason": reason, + } + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Event lifecycle (T52, T61) — per-turn transitions. + # ------------------------------------------------------------------ + + def detect_event_transitions( + self, + transitions: list[dict] | None = None, + ) -> "CannedQueue": + """Append one ``detect_event_transitions`` classifier response. + + ``transitions`` is a list of ``{"event_id": ..., "new_status": + "active"|"completed"|"cancelled", "reason": ...}`` dicts. Pass + an empty list (or omit the argument) to assert that the call + ran but produced no transitions; pass ``None`` for an empty + list with the same shape. + + Note: when no events are seeded, ``detect_event_transitions`` + short-circuits without an LLM call — in that case do NOT append + this slot. + """ + payload = {"transitions": transitions if transitions is not None else []} + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Per-POV scene summary (used after scene close). + # ------------------------------------------------------------------ + + def summarize_scene_pov( + self, + *, + summary: str, + knowledge_facts: list | None = None, + relationship_summary: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one per-POV scene-summary response. + + Used by ``apply_scene_close_summary`` — one call per witness + once a scene closes. + """ + payload: dict[str, Any] = { + "summary": summary, + "knowledge_facts": ( + knowledge_facts if knowledge_facts is not None else [] + ), + "relationship_summary": relationship_summary, + } + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Thread detection (Phase 3 §3.3). + # ------------------------------------------------------------------ + + def detect_threads( + self, + candidates: list[dict] | None = None, + ) -> "CannedQueue": + """Append one ``detect_threads`` classifier response. + + ``candidates`` is a list of ``{"action": "open"|"update", + "title": ..., "summary": ..., "existing_thread_id": ...}`` dicts. + """ + payload = {"candidates": candidates if candidates is not None else []} + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Meanwhile digest — narrative summary of what happened off-screen. + # ------------------------------------------------------------------ + + def meanwhile_digest(self, summary: str) -> "CannedQueue": + """Append one meanwhile-digest narrative response. + + The digest service streams the digest as plain text (not JSON) + so this is a thin wrapper over ``narrative``/``raw`` for + readability at the call site. + """ + self._queue.append(summary) + return self + + # ------------------------------------------------------------------ + # Significance scorer (background worker; rarely hit in unit tests + # but available for completeness). + # ------------------------------------------------------------------ + + def score_significance( + self, + *, + score: float = 0.0, + reason: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one significance-scoring classifier response.""" + payload: dict[str, Any] = {"score": score, "reason": reason} + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Build / introspection. + # ------------------------------------------------------------------ + + def build(self) -> list[str]: + """Return the flat ``list[str]`` queue for ``MockLLMClient``. + + Dict items are JSON-encoded; string items are passed through + verbatim (so streaming responses retain their raw form). + """ + out: list[str] = [] + for item in self._queue: + if isinstance(item, str): + out.append(item) + else: + out.append(json.dumps(item)) + return out + + def __len__(self) -> int: + return len(self._queue) diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py new file mode 100644 index 0000000..3422ef7 --- /dev/null +++ b/tests/test_fixtures.py @@ -0,0 +1,140 @@ +"""Sanity tests for :mod:`tests.fixtures` — the structured CannedQueue +builder for ``MockLLMClient`` (T116). + +The builder is a thin shaping layer over JSON dicts; these tests pin +the JSON shapes and the ``MockLLMClient`` round-trip so nothing +silently regresses if a default field name or shape gets renamed. +""" + +from __future__ import annotations + +import json + +import pytest + +from chat.llm.mock import MockLLMClient +from tests.fixtures import CannedQueue + + +def test_canned_queue_build_emits_expected_shapes(): + """Each builder method emits the JSON shape its classifier consumer + expects. The narrative slot is a bare string (stream). + """ + canned = ( + CannedQueue() + .parse_turn(segments=[{"kind": "dialogue", "text": "hello"}]) + .detect_addressee(addressee_id="bot_a", reason="host") + .narrative("Hi there.") + .state_update() + .state_update(affinity_delta=1, trust_delta=2) + .detect_interjection(should_interject=False, reason="calm") + .detect_event_transitions( + [{"event_id": "evt_1", "new_status": "active", "reason": "they arrived"}] + ) + .detect_scene_close(should_close=False, reason="no signal") + .summarize_scene_pov(summary="BotA noticed the day winding down.") + .detect_threads( + [ + { + "action": "open", + "title": "Maya's job hunt", + "summary": "Maya is looking for a new job", + "existing_thread_id": None, + } + ] + ) + .build() + ) + + # All slots are strings (the MockLLMClient pops strings). + assert all(isinstance(slot, str) for slot in canned) + assert len(canned) == 10 + + # Slot 0: parse_turn — defaults intent="narrative". + parse = json.loads(canned[0]) + assert parse["segments"] == [{"kind": "dialogue", "text": "hello"}] + assert parse["intent"] == "narrative" + assert parse["landing_state_hint"] == "" + + # Slot 1: detect_addressee. + addr = json.loads(canned[1]) + assert addr["addressee_id"] == "bot_a" + assert addr["confidence"] == "medium" + assert addr["reason"] == "host" + + # Slot 2: narrative — bare string, NOT JSON. + assert canned[2] == "Hi there." + with pytest.raises(json.JSONDecodeError): + json.loads(canned[2]) + + # Slot 3: state_update with all defaults — zero deltas, no facts. + su0 = json.loads(canned[3]) + assert su0 == {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []} + + # Slot 4: state_update with custom deltas. + su1 = json.loads(canned[4]) + assert su1["affinity_delta"] == 1 + assert su1["trust_delta"] == 2 + assert su1["knowledge_facts"] == [] + + # Slot 5: detect_interjection. + interj = json.loads(canned[5]) + assert interj == {"should_interject": False, "reason": "calm"} + + # Slot 6: detect_event_transitions. + transitions = json.loads(canned[6]) + assert transitions["transitions"][0]["event_id"] == "evt_1" + assert transitions["transitions"][0]["new_status"] == "active" + + # Slot 7: detect_scene_close. + close = json.loads(canned[7]) + assert close == {"should_close": False, "reason": "no signal"} + + # Slot 8: summarize_scene_pov. + pov = json.loads(canned[8]) + assert pov["summary"] == "BotA noticed the day winding down." + assert pov["knowledge_facts"] == [] + assert pov["relationship_summary"] == "" + + # Slot 9: detect_threads. + threads = json.loads(canned[9]) + assert threads["candidates"][0]["action"] == "open" + assert threads["candidates"][0]["title"] == "Maya's job hunt" + + +@pytest.mark.asyncio +async def test_canned_queue_round_trips_through_mock_llm_client(): + """Building a queue and feeding it to ``MockLLMClient`` produces the + same items back via ``generate`` (in order). This is the contract + every migrated test relies on. + """ + canned = ( + CannedQueue() + .parse_turn(segments=[{"kind": "dialogue", "text": "hi"}]) + .narrative("Hello back.") + .state_update() + .build() + ) + mock = MockLLMClient(canned=canned) + + # generate() pops from the front. + parse_str = await mock.generate([], model="x") + assert json.loads(parse_str)["segments"] == [ + {"kind": "dialogue", "text": "hi"} + ] + + # The narrative slot is a raw string — generate returns it as-is. + narr_str = await mock.generate([], model="x") + assert narr_str == "Hello back." + + # The state_update slot has zero-delta defaults. + su_str = await mock.generate([], model="x") + assert json.loads(su_str) == { + "affinity_delta": 0, + "trust_delta": 0, + "knowledge_facts": [], + } + + # Queue fully drained. + with pytest.raises(IndexError): + await mock.generate([], model="x") diff --git a/tests/test_turn_flow.py b/tests/test_turn_flow.py index 50209cb..347d8c3 100644 --- a/tests/test_turn_flow.py +++ b/tests/test_turn_flow.py @@ -22,6 +22,7 @@ from chat.db.connection import open_db from chat.eventlog.log import append_and_apply, append_event from chat.eventlog.projector import project from chat.llm.mock import MockLLMClient +from tests.fixtures import CannedQueue @pytest.fixture @@ -362,14 +363,20 @@ def test_single_bot_turn_no_guest_regression(app_state_setup, tmp_path): the chat has no guest, so ``detect_interjection`` is NOT invoked. Ends with one user_turn, one assistant_turn, two edge_updates, and a single ``memory_written``. + + T116: migrated to :class:`tests.fixtures.CannedQueue` as a proof of + concept for the structured canned-queue builder. """ _seed(tmp_path / "test.db") - canned_parse = json.dumps( - {"segments": [{"kind": "dialogue", "text": "hello"}]} - ) - mock = _override_llm( - [canned_parse, "Hi there.", _zero_state(), _zero_state()] + canned = ( + CannedQueue() + .parse_turn(segments=[{"kind": "dialogue", "text": "hello"}]) + .narrative("Hi there.") + .state_update() + .state_update() + .build() ) + mock = _override_llm(canned) try: response = app_state_setup.post( "/chats/chat_bot_a/turns", data={"prose": "hello"} @@ -979,29 +986,25 @@ def test_turn_with_event_transition_appends_started_event( }, ) - canned_parse = json.dumps( - {"segments": [{"kind": "dialogue", "text": "they arrived"}]} - ) - canned_event_decision = json.dumps( - { - "transitions": [ - { - "event_id": "evt_1", - "new_status": "active", - "reason": "they arrived", - } - ] - } - ) - mock = _override_llm( - [ - canned_parse, - "They walk in.", - _zero_state(), - _zero_state(), - canned_event_decision, - ] + # T116: migrated to :class:`tests.fixtures.CannedQueue`. + canned = ( + CannedQueue() + .parse_turn(segments=[{"kind": "dialogue", "text": "they arrived"}]) + .narrative("They walk in.") + .state_update() + .state_update() + .detect_event_transitions( + [ + { + "event_id": "evt_1", + "new_status": "active", + "reason": "they arrived", + } + ] + ) + .build() ) + mock = _override_llm(canned) try: response = app_state_setup.post( "/chats/chat_bot_a/turns", data={"prose": "they arrived"} @@ -1155,18 +1158,23 @@ def test_turn_with_no_active_events_skips_classifier(app_state_setup, tmp_path): short-circuits without an LLM call (per T52). The canned queue must therefore have ZERO event-detection slots — same shape as the Phase 2 no-guest baseline. + + T116: migrated to :class:`tests.fixtures.CannedQueue`. """ _seed(tmp_path / "test.db") - canned_parse = json.dumps( - {"segments": [{"kind": "dialogue", "text": "hello"}]} - ) # Only 4 slots: parse + narrative + 2 state-updates. NO extra slot for # event-detection — non-existent active_events causes the helper to # short-circuit before pulling from the queue. - mock = _override_llm( - [canned_parse, "Hi there.", _zero_state(), _zero_state()] + canned = ( + CannedQueue() + .parse_turn(segments=[{"kind": "dialogue", "text": "hello"}]) + .narrative("Hi there.") + .state_update() + .state_update() + .build() ) + mock = _override_llm(canned) try: response = app_state_setup.post( "/chats/chat_bot_a/turns", data={"prose": "hello"}