diff --git a/chat/services/addressee.py b/chat/services/addressee.py index e085d79..1cf1199 100644 --- a/chat/services/addressee.py +++ b/chat/services/addressee.py @@ -22,6 +22,8 @@ from a fallback. from __future__ import annotations +from typing import Literal + from pydantic import BaseModel from chat.llm.classify import classify @@ -39,7 +41,7 @@ class AddresseeDecision(BaseModel): """ addressee_id: str - confidence: str = "medium" # "high" | "medium" | "low" + confidence: Literal["high", "medium", "low"] = "medium" reason: str = "" diff --git a/tests/test_addressee.py b/tests/test_addressee.py index 71954cf..bb90d1b 100644 --- a/tests/test_addressee.py +++ b/tests/test_addressee.py @@ -97,3 +97,38 @@ async def test_classifier_failure_falls_back_to_host(): assert result.addressee_id == "bot_a" assert result.reason == "fallback" assert result.confidence == "low" + + +@pytest.mark.asyncio +async def test_invalid_confidence_value_falls_back_to_default(): + """Pydantic rejects ``confidence`` values outside the literal set + (``high`` / ``medium`` / ``low``). After the retry budget is + exhausted, classify returns the configured fallback default — + here that's ``confidence="low"`` with ``reason="fallback"``. + """ + canned = [ + json.dumps( + { + "addressee_id": "bot_a", + "confidence": "VERY_HIGH", + "reason": "out-of-range value", + } + ), + "still_bad", + "still_bad", + ] + client = MockLLMClient(canned=canned) + + result = await detect_addressee( + client, + classifier_model="test-model", + user_prose="anything", + host_id="bot_a", + host_name="BotA", + guest_id="bot_b", + guest_name="BotB", + ) + + assert result.addressee_id == "bot_a" + assert result.confidence == "low" + assert result.reason == "fallback"