90 lines
3.1 KiB
Python
90 lines
3.1 KiB
Python
"""Tests for the interjection classifier service (T39).
|
|
|
|
Per Requirements §6.2, when a guest is present and the addressee bot has
|
|
just spoken, the *non-addressee* bot may interject with a brief follow-on
|
|
beat. The classifier wrapped here decides whether that interjection
|
|
should fire. The default bias is strongly toward False — the addressee
|
|
has the floor — so an interjection only fires when the silent witness's
|
|
character would plausibly speak up.
|
|
|
|
These tests cover:
|
|
|
|
* The classifier returning ``should_interject=True`` is honored.
|
|
* The classifier returning ``should_interject=False`` is honored.
|
|
* Repeated invalid JSON exhausts the classifier retries and falls back
|
|
to ``should_interject=False`` with ``reason="fallback"``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
|
|
import pytest
|
|
|
|
from chat.llm.mock import MockLLMClient
|
|
from chat.services.interjection import (
|
|
InterjectionDecision,
|
|
detect_interjection,
|
|
)
|
|
|
|
|
|
def _kwargs() -> dict:
|
|
"""Reasonable, non-empty kwargs for ``detect_interjection``."""
|
|
return dict(
|
|
classifier_model="x",
|
|
addressee_name="Alice",
|
|
addressee_just_said="I think we should leave now.",
|
|
silent_witness_name="Bob",
|
|
silent_witness_persona="Skeptical engineer, blunt, protective of the user.",
|
|
silent_witness_edge_to_addressee={
|
|
"affinity": 40,
|
|
"trust": 30,
|
|
"summary": "old rival; mild distrust",
|
|
},
|
|
silent_witness_edge_to_you={
|
|
"affinity": 70,
|
|
"trust": 80,
|
|
"summary": "long-time confidant",
|
|
},
|
|
you_just_said="Where do you both think we should go?",
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_interjection_returns_true_when_classifier_decides_yes():
|
|
canned = json.dumps({"should_interject": True, "reason": "jealousy"})
|
|
mock = MockLLMClient(canned=[canned])
|
|
result = await detect_interjection(mock, **_kwargs())
|
|
assert isinstance(result, InterjectionDecision)
|
|
assert result.should_interject is True
|
|
assert result.reason == "jealousy"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_interjection_returns_false_when_classifier_decides_no():
|
|
canned = json.dumps(
|
|
{"should_interject": False, "reason": "addressee has the floor"}
|
|
)
|
|
mock = MockLLMClient(canned=[canned])
|
|
result = await detect_interjection(mock, **_kwargs())
|
|
assert isinstance(result, InterjectionDecision)
|
|
assert result.should_interject is False
|
|
assert result.reason == "addressee has the floor"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_interjection_falls_back_to_false_on_classifier_failure():
|
|
"""``classify`` retries 3 times; after all fail it returns the default.
|
|
|
|
The default carries ``should_interject=False`` and
|
|
``reason="fallback"`` so callers can tell a real "no" from a
|
|
classifier-degraded "no" if they ever care to.
|
|
"""
|
|
mock = MockLLMClient(
|
|
canned=["this is not json", "still not json", "still not json"]
|
|
)
|
|
result = await detect_interjection(mock, **_kwargs())
|
|
assert isinstance(result, InterjectionDecision)
|
|
assert result.should_interject is False
|
|
assert result.reason == "fallback"
|