chat/tests/fixtures.py

"""Structured test-fixture builder for ``MockLLMClient`` canned queues.

Phase 4.5 (T116) carry-over from Phase 3. The turn-flow tests in
``test_turn_flow.py``, ``test_meanwhile_turn_flow.py``,
``test_phase3_integration.py``, and ``test_phase4_integration.py`` used
to construct ``MockLLMClient`` canned-response queues as raw positional
lists of pre-encoded JSON strings. That worked, but every time a new
classifier call landed in a code path the tests had to be patched in
many places at the right index — easy to mis-position, hard to read.

This module ships :class:`CannedQueue`, a fluent builder that lets a
test declare its classifier expectations by **name** and **order** of
call, not by index into a brittle list. Each method appends one item
to the queue and returns ``self`` for chaining; ``build()`` JSON-encodes
the items and produces the flat ``list[str]`` that
``MockLLMClient(canned=...)`` expects.

Usage
-----

>>> from tests.fixtures import CannedQueue
>>> from chat.llm.mock import MockLLMClient
>>> canned = (
...     CannedQueue()
...         .parse_turn(segments=[{"kind": "dialogue", "text": "hello"}])
...         .narrative("Hi there.")
...         .state_update()
...         .state_update()
...         .build()
... )
>>> mock = MockLLMClient(canned=canned)

Each method maps to a single classifier (or stream) call that the turn
flow makes, in the order the production code makes them. Picking the
right method for the slot you need keeps the test readable and lets the
builder pin sensible defaults for the fields tests don't care about.

Migration template
------------------

To migrate a positional canned-array test:

1. Identify each slot in the existing array and what classifier it
   feeds. Comments above the array often spell this out — start there.
2. Replace each slot with the matching :class:`CannedQueue` method:

   - ``json.dumps({"segments": [...]})`` → ``.parse_turn(segments=...)``
   - bare narrative string → ``.narrative("...")``
   - zero-state JSON  → ``.state_update()`` (defaults are zeros)
   - ``json.dumps({"addressee_id": ...})`` → ``.detect_addressee(...)``
   - ``json.dumps({"should_interject": ...})`` → ``.detect_interjection(...)``
   - ``json.dumps({"should_close": ...})`` → ``.detect_scene_close(...)``
   - ``json.dumps({"transitions": [...]})`` → ``.detect_event_transitions(...)``
   - per-POV summary JSON → ``.summarize_scene_pov(summary=...)``
3. End with ``.build()`` and pass that to
   ``MockLLMClient(canned=...)``. The mock's contract is unchanged.

Notes on streams
----------------

``MockLLMClient.stream`` and ``MockLLMClient.generate`` share one queue
— each pop is one entry, regardless of whether the production code
streams the response or generates it whole. The narrative service
streams; classifier services generate. The builder treats both the same:
``narrative()`` appends a raw string, the classifier methods append
JSON-encoded dicts. Both end up in the same flat ``list[str]`` that the
mock pops from in order.

The remaining tests in the suite (about 30 across the four files
mentioned above) still use positional arrays — Phase 5 work to migrate
the rest. New tests should prefer this builder.
"""

from __future__ import annotations

import json
from typing import Any


class CannedQueue:
    """Fluent builder for ``MockLLMClient`` canned-response queues.

    Each method appends one item to an internal queue and returns
    ``self`` for chaining. ``build()`` returns the flat ``list[str]``
    suitable for ``MockLLMClient(canned=...)``.

    The queue holds either ``dict`` (JSON-encoded at ``build()`` time)
    or ``str`` (passed through verbatim — used for narrative streams).
    """

    def __init__(self) -> None:
        self._queue: list[Any] = []

    # ------------------------------------------------------------------
    # Narrative stream — bare string, no JSON wrapping.
    # ------------------------------------------------------------------

    def narrative(self, text: str) -> "CannedQueue":
        """Append one streaming narrative response.

        ``MockLLMClient.stream`` pops the next entry from the same queue
        as ``generate`` — a bare string is what the streaming bot beat
        consumes. Use one ``narrative()`` per assistant beat (primary,
        and optionally an interjection / second beat).
        """
        self._queue.append(text)
        return self

    def raw(self, value: str) -> "CannedQueue":
        """Append a raw string (escape hatch for non-classifier calls).

        Most tests should reach for the named helpers — this is here
        for one-offs the builder doesn't model yet.
        """
        self._queue.append(value)
        return self

    # ------------------------------------------------------------------
    # Turn parser — splits user prose into segments.
    # ------------------------------------------------------------------

    def parse_turn(
        self,
        *,
        segments: list[dict] | None = None,
        intent: str = "narrative",
        landing_state_hint: str = "",
        **rest: Any,
    ) -> "CannedQueue":
        """Append one ``parse_turn`` classifier response.

        ``intent`` defaults to ``"narrative"``; pass ``"skip_elision"``
        or ``"skip_jump"`` to exercise the natural-language skip paths.
        ``landing_state_hint`` carries the residual descriptor for
        elision skips and is otherwise ignored.
        """
        payload: dict[str, Any] = {
            "segments": segments if segments is not None else [],
            "intent": intent,
            "landing_state_hint": landing_state_hint,
        }
        payload.update(rest)
        self._queue.append(payload)
        return self

    # ------------------------------------------------------------------
    # Multi-entity addressee classifier (T74.1).
    # ------------------------------------------------------------------

    def detect_addressee(
        self,
        *,
        addressee_id: str,
        confidence: str = "medium",
        reason: str = "",
        **rest: Any,
    ) -> "CannedQueue":
        """Append one ``detect_addressee`` classifier response."""
        payload: dict[str, Any] = {
            "addressee_id": addressee_id,
            "confidence": confidence,
            "reason": reason,
        }
        payload.update(rest)
        self._queue.append(payload)
        return self

    # ------------------------------------------------------------------
    # State-update — one per directed edge per turn.
    # ------------------------------------------------------------------

    def state_update(
        self,
        *,
        affinity_delta: int = 0,
        trust_delta: int = 0,
        knowledge_facts: list | None = None,
        **rest: Any,
    ) -> "CannedQueue":
        """Append one ``apply_state_update`` classifier response.

        Defaults to a benign zero-delta payload — tests that don't care
        about state mutations can call this without arguments. One call
        is required per directed edge that fires after the assistant
        beat (e.g. single-bot non-guest turn = 2 calls; multi-bot guest
        turn = 6 calls).
        """
        payload: dict[str, Any] = {
            "affinity_delta": affinity_delta,
            "trust_delta": trust_delta,
            "knowledge_facts": (
                knowledge_facts if knowledge_facts is not None else []
            ),
        }
        payload.update(rest)
        self._queue.append(payload)
        return self

    def zero_state(self) -> "CannedQueue":
        """Alias for ``state_update()`` with all defaults — matches the
        ``_zero_state()`` helper in existing tests.
        """
        return self.state_update()

    # ------------------------------------------------------------------
    # Interjection (T74.2) — silent witness chimes in.
    # ------------------------------------------------------------------

    def detect_interjection(
        self,
        *,
        should_interject: bool,
        reason: str = "",
        **rest: Any,
    ) -> "CannedQueue":
        """Append one ``detect_interjection`` classifier response."""
        payload: dict[str, Any] = {
            "should_interject": should_interject,
            "reason": reason,
        }
        payload.update(rest)
        self._queue.append(payload)
        return self

    def detect_interjection_targeted(
        self,
        *,
        targeted: bool,
        target_id: str | None = None,
        reason: str = "",
        **rest: Any,
    ) -> "CannedQueue":
        """Append one targeted-interjection classifier response."""
        payload: dict[str, Any] = {
            "targeted": targeted,
            "target_id": target_id,
            "reason": reason,
        }
        payload.update(rest)
        self._queue.append(payload)
        return self

    # ------------------------------------------------------------------
    # Scene-close detector (T26).
    # ------------------------------------------------------------------

    def detect_scene_close(
        self,
        *,
        should_close: bool,
        reason: str = "",
        **rest: Any,
    ) -> "CannedQueue":
        """Append one ``detect_scene_close`` classifier response."""
        payload: dict[str, Any] = {
            "should_close": should_close,
            "reason": reason,
        }
        payload.update(rest)
        self._queue.append(payload)
        return self

    # ------------------------------------------------------------------
    # Event lifecycle (T52, T61) — per-turn transitions.
    # ------------------------------------------------------------------

    def detect_event_transitions(
        self,
        transitions: list[dict] | None = None,
    ) -> "CannedQueue":
        """Append one ``detect_event_transitions`` classifier response.

        ``transitions`` is a list of ``{"event_id": ..., "new_status":
        "active"|"completed"|"cancelled", "reason": ...}`` dicts. Pass
        an empty list (or omit the argument) to assert that the call
        ran but produced no transitions; pass ``None`` for an empty
        list with the same shape.

        Note: when no events are seeded, ``detect_event_transitions``
        short-circuits without an LLM call — in that case do NOT append
        this slot.
        """
        payload = {"transitions": transitions if transitions is not None else []}
        self._queue.append(payload)
        return self

    # ------------------------------------------------------------------
    # Per-POV scene summary (used after scene close).
    # ------------------------------------------------------------------

    def summarize_scene_pov(
        self,
        *,
        summary: str,
        knowledge_facts: list | None = None,
        relationship_summary: str = "",
        **rest: Any,
    ) -> "CannedQueue":
        """Append one per-POV scene-summary response.

        Used by ``apply_scene_close_summary`` — one call per witness
        once a scene closes.
        """
        payload: dict[str, Any] = {
            "summary": summary,
            "knowledge_facts": (
                knowledge_facts if knowledge_facts is not None else []
            ),
            "relationship_summary": relationship_summary,
        }
        payload.update(rest)
        self._queue.append(payload)
        return self

    # ------------------------------------------------------------------
    # Thread detection (Phase 3 §3.3).
    # ------------------------------------------------------------------

    def detect_threads(
        self,
        candidates: list[dict] | None = None,
    ) -> "CannedQueue":
        """Append one ``detect_threads`` classifier response.

        ``candidates`` is a list of ``{"action": "open"|"update",
        "title": ..., "summary": ..., "existing_thread_id": ...}`` dicts.
        """
        payload = {"candidates": candidates if candidates is not None else []}
        self._queue.append(payload)
        return self

    # ------------------------------------------------------------------
    # Meanwhile digest — narrative summary of what happened off-screen.
    # ------------------------------------------------------------------

    def meanwhile_digest(self, summary: str) -> "CannedQueue":
        """Append one meanwhile-digest narrative response.

        The digest service streams the digest as plain text (not JSON)
        so this is a thin wrapper over ``narrative``/``raw`` for
        readability at the call site.
        """
        self._queue.append(summary)
        return self

    # ------------------------------------------------------------------
    # Significance scorer (background worker; rarely hit in unit tests
    # but available for completeness).
    # ------------------------------------------------------------------

    def score_significance(
        self,
        *,
        score: float = 0.0,
        reason: str = "",
        **rest: Any,
    ) -> "CannedQueue":
        """Append one significance-scoring classifier response."""
        payload: dict[str, Any] = {"score": score, "reason": reason}
        payload.update(rest)
        self._queue.append(payload)
        return self

    # ------------------------------------------------------------------
    # Build / introspection.
    # ------------------------------------------------------------------

    def build(self) -> list[str]:
        """Return the flat ``list[str]`` queue for ``MockLLMClient``.

        Dict items are JSON-encoded; string items are passed through
        verbatim (so streaming responses retain their raw form).
        """
        out: list[str] = []
        for item in self._queue:
            if isinstance(item, str):
                out.append(item)
            else:
                out.append(json.dumps(item))
        return out

    def __len__(self) -> int:
        return len(self._queue)