135 lines
3.7 KiB
Python
135 lines
3.7 KiB
Python
"""Addressee classifier service tests (T74.1).
|
|
|
|
Covers :func:`chat.services.addressee.detect_addressee`:
|
|
|
|
- Classifier picks the guest -> ``addressee_id == guest_id``.
|
|
- Classifier picks the host -> ``addressee_id == host_id``.
|
|
- Classifier flakes (3 bad-JSON responses, exhausting the built-in
|
|
retry budget in :func:`chat.llm.classify.classify`) -> fallback to
|
|
the host with ``reason="fallback"``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
|
|
import pytest
|
|
|
|
from chat.llm.mock import MockLLMClient
|
|
from chat.services.addressee import AddresseeDecision, detect_addressee
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_classifier_picks_guest():
|
|
"""Classifier returns the guest id verbatim — caller propagates it."""
|
|
canned = [
|
|
json.dumps(
|
|
{
|
|
"addressee_id": "bot_b",
|
|
"confidence": "high",
|
|
"reason": "user named BotB",
|
|
}
|
|
)
|
|
]
|
|
client = MockLLMClient(canned=canned)
|
|
|
|
result = await detect_addressee(
|
|
client,
|
|
classifier_model="test-model",
|
|
user_prose="BotB, what do you think?",
|
|
host_id="bot_a",
|
|
host_name="BotA",
|
|
guest_id="bot_b",
|
|
guest_name="BotB",
|
|
)
|
|
|
|
assert isinstance(result, AddresseeDecision)
|
|
assert result.addressee_id == "bot_b"
|
|
assert result.confidence == "high"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_classifier_picks_host():
|
|
"""Classifier returns the host id — caller propagates it."""
|
|
canned = [
|
|
json.dumps(
|
|
{
|
|
"addressee_id": "bot_a",
|
|
"confidence": "medium",
|
|
"reason": "narration aimed at host",
|
|
}
|
|
)
|
|
]
|
|
client = MockLLMClient(canned=canned)
|
|
|
|
result = await detect_addressee(
|
|
client,
|
|
classifier_model="test-model",
|
|
user_prose="I lean back and stretch.",
|
|
host_id="bot_a",
|
|
host_name="BotA",
|
|
guest_id="bot_b",
|
|
guest_name="BotB",
|
|
)
|
|
|
|
assert result.addressee_id == "bot_a"
|
|
assert result.confidence == "medium"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_classifier_failure_falls_back_to_host():
|
|
"""Three bad-JSON responses exhaust the retry budget and the
|
|
classifier-failure fallback returns ``host_id`` with
|
|
``reason="fallback"``."""
|
|
canned = ["not json", "still not json", "garbage"]
|
|
client = MockLLMClient(canned=canned)
|
|
|
|
result = await detect_addressee(
|
|
client,
|
|
classifier_model="test-model",
|
|
user_prose="anything",
|
|
host_id="bot_a",
|
|
host_name="BotA",
|
|
guest_id="bot_b",
|
|
guest_name="BotB",
|
|
)
|
|
|
|
assert result.addressee_id == "bot_a"
|
|
assert result.reason == "fallback"
|
|
assert result.confidence == "low"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_invalid_confidence_value_falls_back_to_default():
|
|
"""Pydantic rejects ``confidence`` values outside the literal set
|
|
(``high`` / ``medium`` / ``low``). After the retry budget is
|
|
exhausted, classify returns the configured fallback default —
|
|
here that's ``confidence="low"`` with ``reason="fallback"``.
|
|
"""
|
|
canned = [
|
|
json.dumps(
|
|
{
|
|
"addressee_id": "bot_a",
|
|
"confidence": "VERY_HIGH",
|
|
"reason": "out-of-range value",
|
|
}
|
|
),
|
|
"still_bad",
|
|
"still_bad",
|
|
]
|
|
client = MockLLMClient(canned=canned)
|
|
|
|
result = await detect_addressee(
|
|
client,
|
|
classifier_model="test-model",
|
|
user_prose="anything",
|
|
host_id="bot_a",
|
|
host_name="BotA",
|
|
guest_id="bot_b",
|
|
guest_name="BotB",
|
|
)
|
|
|
|
assert result.addressee_id == "bot_a"
|
|
assert result.confidence == "low"
|
|
assert result.reason == "fallback"
|