chat/chat/services/interjection.py

"""Interjection classifier service (T39).

Per Requirements §6.2, when a guest is present and the addressee bot has
just spoken, the *non-addressee* bot may follow on with a brief
interjection beat. This service decides whether that interjection
fires. Conservative bias: most turns return ``should_interject=False``
— the addressee has the floor and an interjection is the exception.
Trigger ``True`` only when the silent witness's character, given their
persona and edges, would plausibly speak up: jealousy, surprise, strong
agreement worth voicing, correcting a factual falsehood, urgency.

T44 (turn flow) calls this and, on ``True``, generates the brief
follow-on response as the silent witness. Classifier failure falls back
to ``should_interject=False`` with ``reason="fallback"`` so the chat
keeps moving (§3.3 graceful-degradation rule); callers that care can
distinguish a real "no" from a degraded "no" by the reason string.
"""

from __future__ import annotations

from pydantic import BaseModel

from chat.llm.classify import classify
from chat.llm.client import LLMClient


class InterjectionDecision(BaseModel):
    """Whether the silent witness interjects, plus a short reason.

    Defaults are a deliberate no-op: ``should_interject=False`` with an
    empty reason. The classifier-failure fallback uses
    ``reason="fallback"`` so it's distinguishable from a real "no".
    """

    should_interject: bool = False
    reason: str = ""


_SYSTEM = (
    "You decide whether a silent witness character interjects after the "
    "addressee character finishes speaking. STRONGLY default to false — "
    "the addressee has the floor and most turns should NOT have an "
    "interjection. Only return true when the silent witness's character, "
    "given their persona and edges, would plausibly speak up: jealousy, "
    "surprise, strong agreement worth voicing, correcting a factual "
    "falsehood, urgency. Output strict JSON matching the schema."
)


async def detect_interjection(
    client: LLMClient,
    *,
    classifier_model: str,
    addressee_name: str,
    addressee_just_said: str,
    silent_witness_name: str,
    silent_witness_persona: str,
    silent_witness_edge_to_addressee: dict,  # {affinity, trust, summary}
    silent_witness_edge_to_you: dict,
    you_just_said: str,
    timeout_s: float = 30.0,
) -> InterjectionDecision:
    """Decide whether the silent witness bot interjects after the addressee
    finishes speaking.

    The two ``silent_witness_edge_*`` dicts carry the silent witness's
    directed edges toward the addressee and toward the user ("you"),
    each shaped ``{affinity: int, trust: int, summary: str}``. Missing
    keys fall back to a 50/50 baseline with an empty summary so this
    function tolerates partially-populated edge state without raising.
    """
    user = (
        f"You said: {you_just_said}\n\n"
        f"{addressee_name} just said: {addressee_just_said}\n\n"
        f"Silent witness: {silent_witness_name}\n"
        f"Persona: {silent_witness_persona}\n"
        f"Edge {silent_witness_name} -> {addressee_name}: "
        f"affinity={silent_witness_edge_to_addressee.get('affinity', 50)}, "
        f"trust={silent_witness_edge_to_addressee.get('trust', 50)}, "
        f"summary={silent_witness_edge_to_addressee.get('summary', '')}\n"
        f"Edge {silent_witness_name} -> you: "
        f"affinity={silent_witness_edge_to_you.get('affinity', 50)}, "
        f"trust={silent_witness_edge_to_you.get('trust', 50)}, "
        f"summary={silent_witness_edge_to_you.get('summary', '')}\n\n"
        f"Should {silent_witness_name} interject?"
    )
    return await classify(
        client,
        model=classifier_model,
        system=_SYSTEM,
        user=user,
        schema=InterjectionDecision,
        default=InterjectionDecision(
            should_interject=False, reason="fallback"
        ),
        timeout_s=timeout_s,
    )


__all__ = ["InterjectionDecision", "detect_interjection"]