Files
chat/tests/test_addressee.py

135 lines
3.7 KiB
Python

"""Addressee classifier service tests (T74.1).
Covers :func:`chat.services.addressee.detect_addressee`:
- Classifier picks the guest -> ``addressee_id == guest_id``.
- Classifier picks the host -> ``addressee_id == host_id``.
- Classifier flakes (3 bad-JSON responses, exhausting the built-in
retry budget in :func:`chat.llm.classify.classify`) -> fallback to
the host with ``reason="fallback"``.
"""
from __future__ import annotations
import json
import pytest
from chat.llm.mock import MockLLMClient
from chat.services.addressee import AddresseeDecision, detect_addressee
@pytest.mark.asyncio
async def test_classifier_picks_guest():
"""Classifier returns the guest id verbatim — caller propagates it."""
canned = [
json.dumps(
{
"addressee_id": "bot_b",
"confidence": "high",
"reason": "user named BotB",
}
)
]
client = MockLLMClient(canned=canned)
result = await detect_addressee(
client,
classifier_model="test-model",
user_prose="BotB, what do you think?",
host_id="bot_a",
host_name="BotA",
guest_id="bot_b",
guest_name="BotB",
)
assert isinstance(result, AddresseeDecision)
assert result.addressee_id == "bot_b"
assert result.confidence == "high"
@pytest.mark.asyncio
async def test_classifier_picks_host():
"""Classifier returns the host id — caller propagates it."""
canned = [
json.dumps(
{
"addressee_id": "bot_a",
"confidence": "medium",
"reason": "narration aimed at host",
}
)
]
client = MockLLMClient(canned=canned)
result = await detect_addressee(
client,
classifier_model="test-model",
user_prose="I lean back and stretch.",
host_id="bot_a",
host_name="BotA",
guest_id="bot_b",
guest_name="BotB",
)
assert result.addressee_id == "bot_a"
assert result.confidence == "medium"
@pytest.mark.asyncio
async def test_classifier_failure_falls_back_to_host():
"""Three bad-JSON responses exhaust the retry budget and the
classifier-failure fallback returns ``host_id`` with
``reason="fallback"``."""
canned = ["not json", "still not json", "garbage"]
client = MockLLMClient(canned=canned)
result = await detect_addressee(
client,
classifier_model="test-model",
user_prose="anything",
host_id="bot_a",
host_name="BotA",
guest_id="bot_b",
guest_name="BotB",
)
assert result.addressee_id == "bot_a"
assert result.reason == "fallback"
assert result.confidence == "low"
@pytest.mark.asyncio
async def test_invalid_confidence_value_falls_back_to_default():
"""Pydantic rejects ``confidence`` values outside the literal set
(``high`` / ``medium`` / ``low``). After the retry budget is
exhausted, classify returns the configured fallback default —
here that's ``confidence="low"`` with ``reason="fallback"``.
"""
canned = [
json.dumps(
{
"addressee_id": "bot_a",
"confidence": "VERY_HIGH",
"reason": "out-of-range value",
}
),
"still_bad",
"still_bad",
]
client = MockLLMClient(canned=canned)
result = await detect_addressee(
client,
classifier_model="test-model",
user_prose="anything",
host_id="bot_a",
host_name="BotA",
guest_id="bot_b",
guest_name="BotB",
)
assert result.addressee_id == "bot_a"
assert result.confidence == "low"
assert result.reason == "fallback"