diff --git a/tests/fixtures.py b/tests/fixtures.py new file mode 100644 index 0000000..6ad952b --- /dev/null +++ b/tests/fixtures.py @@ -0,0 +1,383 @@ +"""Structured test-fixture builder for ``MockLLMClient`` canned queues. + +Phase 4.5 (T116) carry-over from Phase 3. The turn-flow tests in +``test_turn_flow.py``, ``test_meanwhile_turn_flow.py``, +``test_phase3_integration.py``, and ``test_phase4_integration.py`` used +to construct ``MockLLMClient`` canned-response queues as raw positional +lists of pre-encoded JSON strings. That worked, but every time a new +classifier call landed in a code path the tests had to be patched in +many places at the right index — easy to mis-position, hard to read. + +This module ships :class:`CannedQueue`, a fluent builder that lets a +test declare its classifier expectations by **name** and **order** of +call, not by index into a brittle list. Each method appends one item +to the queue and returns ``self`` for chaining; ``build()`` JSON-encodes +the items and produces the flat ``list[str]`` that +``MockLLMClient(canned=...)`` expects. + +Usage +----- + +>>> from tests.fixtures import CannedQueue +>>> from chat.llm.mock import MockLLMClient +>>> canned = ( +... CannedQueue() +... .parse_turn(segments=[{"kind": "dialogue", "text": "hello"}]) +... .narrative("Hi there.") +... .state_update() +... .state_update() +... .build() +... ) +>>> mock = MockLLMClient(canned=canned) + +Each method maps to a single classifier (or stream) call that the turn +flow makes, in the order the production code makes them. Picking the +right method for the slot you need keeps the test readable and lets the +builder pin sensible defaults for the fields tests don't care about. + +Migration template +------------------ + +To migrate a positional canned-array test: + +1. Identify each slot in the existing array and what classifier it + feeds. Comments above the array often spell this out — start there. +2. Replace each slot with the matching :class:`CannedQueue` method: + + - ``json.dumps({"segments": [...]})`` → ``.parse_turn(segments=...)`` + - bare narrative string → ``.narrative("...")`` + - zero-state JSON → ``.state_update()`` (defaults are zeros) + - ``json.dumps({"addressee_id": ...})`` → ``.detect_addressee(...)`` + - ``json.dumps({"should_interject": ...})`` → ``.detect_interjection(...)`` + - ``json.dumps({"should_close": ...})`` → ``.detect_scene_close(...)`` + - ``json.dumps({"transitions": [...]})`` → ``.detect_event_transitions(...)`` + - per-POV summary JSON → ``.summarize_scene_pov(summary=...)`` +3. End with ``.build()`` and pass that to + ``MockLLMClient(canned=...)``. The mock's contract is unchanged. + +Notes on streams +---------------- + +``MockLLMClient.stream`` and ``MockLLMClient.generate`` share one queue +— each pop is one entry, regardless of whether the production code +streams the response or generates it whole. The narrative service +streams; classifier services generate. The builder treats both the same: +``narrative()`` appends a raw string, the classifier methods append +JSON-encoded dicts. Both end up in the same flat ``list[str]`` that the +mock pops from in order. + +The remaining tests in the suite (about 30 across the four files +mentioned above) still use positional arrays — Phase 5 work to migrate +the rest. New tests should prefer this builder. +""" + +from __future__ import annotations + +import json +from typing import Any + + +class CannedQueue: + """Fluent builder for ``MockLLMClient`` canned-response queues. + + Each method appends one item to an internal queue and returns + ``self`` for chaining. ``build()`` returns the flat ``list[str]`` + suitable for ``MockLLMClient(canned=...)``. + + The queue holds either ``dict`` (JSON-encoded at ``build()`` time) + or ``str`` (passed through verbatim — used for narrative streams). + """ + + def __init__(self) -> None: + self._queue: list[Any] = [] + + # ------------------------------------------------------------------ + # Narrative stream — bare string, no JSON wrapping. + # ------------------------------------------------------------------ + + def narrative(self, text: str) -> "CannedQueue": + """Append one streaming narrative response. + + ``MockLLMClient.stream`` pops the next entry from the same queue + as ``generate`` — a bare string is what the streaming bot beat + consumes. Use one ``narrative()`` per assistant beat (primary, + and optionally an interjection / second beat). + """ + self._queue.append(text) + return self + + def raw(self, value: str) -> "CannedQueue": + """Append a raw string (escape hatch for non-classifier calls). + + Most tests should reach for the named helpers — this is here + for one-offs the builder doesn't model yet. + """ + self._queue.append(value) + return self + + # ------------------------------------------------------------------ + # Turn parser — splits user prose into segments. + # ------------------------------------------------------------------ + + def parse_turn( + self, + *, + segments: list[dict] | None = None, + intent: str = "narrative", + landing_state_hint: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one ``parse_turn`` classifier response. + + ``intent`` defaults to ``"narrative"``; pass ``"skip_elision"`` + or ``"skip_jump"`` to exercise the natural-language skip paths. + ``landing_state_hint`` carries the residual descriptor for + elision skips and is otherwise ignored. + """ + payload: dict[str, Any] = { + "segments": segments if segments is not None else [], + "intent": intent, + "landing_state_hint": landing_state_hint, + } + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Multi-entity addressee classifier (T74.1). + # ------------------------------------------------------------------ + + def detect_addressee( + self, + *, + addressee_id: str, + confidence: str = "medium", + reason: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one ``detect_addressee`` classifier response.""" + payload: dict[str, Any] = { + "addressee_id": addressee_id, + "confidence": confidence, + "reason": reason, + } + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # State-update — one per directed edge per turn. + # ------------------------------------------------------------------ + + def state_update( + self, + *, + affinity_delta: int = 0, + trust_delta: int = 0, + knowledge_facts: list | None = None, + **rest: Any, + ) -> "CannedQueue": + """Append one ``apply_state_update`` classifier response. + + Defaults to a benign zero-delta payload — tests that don't care + about state mutations can call this without arguments. One call + is required per directed edge that fires after the assistant + beat (e.g. single-bot non-guest turn = 2 calls; multi-bot guest + turn = 6 calls). + """ + payload: dict[str, Any] = { + "affinity_delta": affinity_delta, + "trust_delta": trust_delta, + "knowledge_facts": ( + knowledge_facts if knowledge_facts is not None else [] + ), + } + payload.update(rest) + self._queue.append(payload) + return self + + def zero_state(self) -> "CannedQueue": + """Alias for ``state_update()`` with all defaults — matches the + ``_zero_state()`` helper in existing tests. + """ + return self.state_update() + + # ------------------------------------------------------------------ + # Interjection (T74.2) — silent witness chimes in. + # ------------------------------------------------------------------ + + def detect_interjection( + self, + *, + should_interject: bool, + reason: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one ``detect_interjection`` classifier response.""" + payload: dict[str, Any] = { + "should_interject": should_interject, + "reason": reason, + } + payload.update(rest) + self._queue.append(payload) + return self + + def detect_interjection_targeted( + self, + *, + targeted: bool, + target_id: str | None = None, + reason: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one targeted-interjection classifier response.""" + payload: dict[str, Any] = { + "targeted": targeted, + "target_id": target_id, + "reason": reason, + } + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Scene-close detector (T26). + # ------------------------------------------------------------------ + + def detect_scene_close( + self, + *, + should_close: bool, + reason: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one ``detect_scene_close`` classifier response.""" + payload: dict[str, Any] = { + "should_close": should_close, + "reason": reason, + } + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Event lifecycle (T52, T61) — per-turn transitions. + # ------------------------------------------------------------------ + + def detect_event_transitions( + self, + transitions: list[dict] | None = None, + ) -> "CannedQueue": + """Append one ``detect_event_transitions`` classifier response. + + ``transitions`` is a list of ``{"event_id": ..., "new_status": + "active"|"completed"|"cancelled", "reason": ...}`` dicts. Pass + an empty list (or omit the argument) to assert that the call + ran but produced no transitions; pass ``None`` for an empty + list with the same shape. + + Note: when no events are seeded, ``detect_event_transitions`` + short-circuits without an LLM call — in that case do NOT append + this slot. + """ + payload = {"transitions": transitions if transitions is not None else []} + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Per-POV scene summary (used after scene close). + # ------------------------------------------------------------------ + + def summarize_scene_pov( + self, + *, + summary: str, + knowledge_facts: list | None = None, + relationship_summary: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one per-POV scene-summary response. + + Used by ``apply_scene_close_summary`` — one call per witness + once a scene closes. + """ + payload: dict[str, Any] = { + "summary": summary, + "knowledge_facts": ( + knowledge_facts if knowledge_facts is not None else [] + ), + "relationship_summary": relationship_summary, + } + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Thread detection (Phase 3 §3.3). + # ------------------------------------------------------------------ + + def detect_threads( + self, + candidates: list[dict] | None = None, + ) -> "CannedQueue": + """Append one ``detect_threads`` classifier response. + + ``candidates`` is a list of ``{"action": "open"|"update", + "title": ..., "summary": ..., "existing_thread_id": ...}`` dicts. + """ + payload = {"candidates": candidates if candidates is not None else []} + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Meanwhile digest — narrative summary of what happened off-screen. + # ------------------------------------------------------------------ + + def meanwhile_digest(self, summary: str) -> "CannedQueue": + """Append one meanwhile-digest narrative response. + + The digest service streams the digest as plain text (not JSON) + so this is a thin wrapper over ``narrative``/``raw`` for + readability at the call site. + """ + self._queue.append(summary) + return self + + # ------------------------------------------------------------------ + # Significance scorer (background worker; rarely hit in unit tests + # but available for completeness). + # ------------------------------------------------------------------ + + def score_significance( + self, + *, + score: float = 0.0, + reason: str = "", + **rest: Any, + ) -> "CannedQueue": + """Append one significance-scoring classifier response.""" + payload: dict[str, Any] = {"score": score, "reason": reason} + payload.update(rest) + self._queue.append(payload) + return self + + # ------------------------------------------------------------------ + # Build / introspection. + # ------------------------------------------------------------------ + + def build(self) -> list[str]: + """Return the flat ``list[str]`` queue for ``MockLLMClient``. + + Dict items are JSON-encoded; string items are passed through + verbatim (so streaming responses retain their raw form). + """ + out: list[str] = [] + for item in self._queue: + if isinstance(item, str): + out.append(item) + else: + out.append(json.dumps(item)) + return out + + def __len__(self) -> int: + return len(self._queue) diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py new file mode 100644 index 0000000..3422ef7 --- /dev/null +++ b/tests/test_fixtures.py @@ -0,0 +1,140 @@ +"""Sanity tests for :mod:`tests.fixtures` — the structured CannedQueue +builder for ``MockLLMClient`` (T116). + +The builder is a thin shaping layer over JSON dicts; these tests pin +the JSON shapes and the ``MockLLMClient`` round-trip so nothing +silently regresses if a default field name or shape gets renamed. +""" + +from __future__ import annotations + +import json + +import pytest + +from chat.llm.mock import MockLLMClient +from tests.fixtures import CannedQueue + + +def test_canned_queue_build_emits_expected_shapes(): + """Each builder method emits the JSON shape its classifier consumer + expects. The narrative slot is a bare string (stream). + """ + canned = ( + CannedQueue() + .parse_turn(segments=[{"kind": "dialogue", "text": "hello"}]) + .detect_addressee(addressee_id="bot_a", reason="host") + .narrative("Hi there.") + .state_update() + .state_update(affinity_delta=1, trust_delta=2) + .detect_interjection(should_interject=False, reason="calm") + .detect_event_transitions( + [{"event_id": "evt_1", "new_status": "active", "reason": "they arrived"}] + ) + .detect_scene_close(should_close=False, reason="no signal") + .summarize_scene_pov(summary="BotA noticed the day winding down.") + .detect_threads( + [ + { + "action": "open", + "title": "Maya's job hunt", + "summary": "Maya is looking for a new job", + "existing_thread_id": None, + } + ] + ) + .build() + ) + + # All slots are strings (the MockLLMClient pops strings). + assert all(isinstance(slot, str) for slot in canned) + assert len(canned) == 10 + + # Slot 0: parse_turn — defaults intent="narrative". + parse = json.loads(canned[0]) + assert parse["segments"] == [{"kind": "dialogue", "text": "hello"}] + assert parse["intent"] == "narrative" + assert parse["landing_state_hint"] == "" + + # Slot 1: detect_addressee. + addr = json.loads(canned[1]) + assert addr["addressee_id"] == "bot_a" + assert addr["confidence"] == "medium" + assert addr["reason"] == "host" + + # Slot 2: narrative — bare string, NOT JSON. + assert canned[2] == "Hi there." + with pytest.raises(json.JSONDecodeError): + json.loads(canned[2]) + + # Slot 3: state_update with all defaults — zero deltas, no facts. + su0 = json.loads(canned[3]) + assert su0 == {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []} + + # Slot 4: state_update with custom deltas. + su1 = json.loads(canned[4]) + assert su1["affinity_delta"] == 1 + assert su1["trust_delta"] == 2 + assert su1["knowledge_facts"] == [] + + # Slot 5: detect_interjection. + interj = json.loads(canned[5]) + assert interj == {"should_interject": False, "reason": "calm"} + + # Slot 6: detect_event_transitions. + transitions = json.loads(canned[6]) + assert transitions["transitions"][0]["event_id"] == "evt_1" + assert transitions["transitions"][0]["new_status"] == "active" + + # Slot 7: detect_scene_close. + close = json.loads(canned[7]) + assert close == {"should_close": False, "reason": "no signal"} + + # Slot 8: summarize_scene_pov. + pov = json.loads(canned[8]) + assert pov["summary"] == "BotA noticed the day winding down." + assert pov["knowledge_facts"] == [] + assert pov["relationship_summary"] == "" + + # Slot 9: detect_threads. + threads = json.loads(canned[9]) + assert threads["candidates"][0]["action"] == "open" + assert threads["candidates"][0]["title"] == "Maya's job hunt" + + +@pytest.mark.asyncio +async def test_canned_queue_round_trips_through_mock_llm_client(): + """Building a queue and feeding it to ``MockLLMClient`` produces the + same items back via ``generate`` (in order). This is the contract + every migrated test relies on. + """ + canned = ( + CannedQueue() + .parse_turn(segments=[{"kind": "dialogue", "text": "hi"}]) + .narrative("Hello back.") + .state_update() + .build() + ) + mock = MockLLMClient(canned=canned) + + # generate() pops from the front. + parse_str = await mock.generate([], model="x") + assert json.loads(parse_str)["segments"] == [ + {"kind": "dialogue", "text": "hi"} + ] + + # The narrative slot is a raw string — generate returns it as-is. + narr_str = await mock.generate([], model="x") + assert narr_str == "Hello back." + + # The state_update slot has zero-delta defaults. + su_str = await mock.generate([], model="x") + assert json.loads(su_str) == { + "affinity_delta": 0, + "trust_delta": 0, + "knowledge_facts": [], + } + + # Queue fully drained. + with pytest.raises(IndexError): + await mock.generate([], model="x") diff --git a/tests/test_turn_flow.py b/tests/test_turn_flow.py index 50209cb..347d8c3 100644 --- a/tests/test_turn_flow.py +++ b/tests/test_turn_flow.py @@ -22,6 +22,7 @@ from chat.db.connection import open_db from chat.eventlog.log import append_and_apply, append_event from chat.eventlog.projector import project from chat.llm.mock import MockLLMClient +from tests.fixtures import CannedQueue @pytest.fixture @@ -362,14 +363,20 @@ def test_single_bot_turn_no_guest_regression(app_state_setup, tmp_path): the chat has no guest, so ``detect_interjection`` is NOT invoked. Ends with one user_turn, one assistant_turn, two edge_updates, and a single ``memory_written``. + + T116: migrated to :class:`tests.fixtures.CannedQueue` as a proof of + concept for the structured canned-queue builder. """ _seed(tmp_path / "test.db") - canned_parse = json.dumps( - {"segments": [{"kind": "dialogue", "text": "hello"}]} - ) - mock = _override_llm( - [canned_parse, "Hi there.", _zero_state(), _zero_state()] + canned = ( + CannedQueue() + .parse_turn(segments=[{"kind": "dialogue", "text": "hello"}]) + .narrative("Hi there.") + .state_update() + .state_update() + .build() ) + mock = _override_llm(canned) try: response = app_state_setup.post( "/chats/chat_bot_a/turns", data={"prose": "hello"} @@ -979,29 +986,25 @@ def test_turn_with_event_transition_appends_started_event( }, ) - canned_parse = json.dumps( - {"segments": [{"kind": "dialogue", "text": "they arrived"}]} - ) - canned_event_decision = json.dumps( - { - "transitions": [ - { - "event_id": "evt_1", - "new_status": "active", - "reason": "they arrived", - } - ] - } - ) - mock = _override_llm( - [ - canned_parse, - "They walk in.", - _zero_state(), - _zero_state(), - canned_event_decision, - ] + # T116: migrated to :class:`tests.fixtures.CannedQueue`. + canned = ( + CannedQueue() + .parse_turn(segments=[{"kind": "dialogue", "text": "they arrived"}]) + .narrative("They walk in.") + .state_update() + .state_update() + .detect_event_transitions( + [ + { + "event_id": "evt_1", + "new_status": "active", + "reason": "they arrived", + } + ] + ) + .build() ) + mock = _override_llm(canned) try: response = app_state_setup.post( "/chats/chat_bot_a/turns", data={"prose": "they arrived"} @@ -1155,18 +1158,23 @@ def test_turn_with_no_active_events_skips_classifier(app_state_setup, tmp_path): short-circuits without an LLM call (per T52). The canned queue must therefore have ZERO event-detection slots — same shape as the Phase 2 no-guest baseline. + + T116: migrated to :class:`tests.fixtures.CannedQueue`. """ _seed(tmp_path / "test.db") - canned_parse = json.dumps( - {"segments": [{"kind": "dialogue", "text": "hello"}]} - ) # Only 4 slots: parse + narrative + 2 state-updates. NO extra slot for # event-detection — non-existent active_events causes the helper to # short-circuit before pulling from the queue. - mock = _override_llm( - [canned_parse, "Hi there.", _zero_state(), _zero_state()] + canned = ( + CannedQueue() + .parse_turn(segments=[{"kind": "dialogue", "text": "hello"}]) + .narrative("Hi there.") + .state_update() + .state_update() + .build() ) + mock = _override_llm(canned) try: response = app_state_setup.post( "/chats/chat_bot_a/turns", data={"prose": "hello"}