feat: classifier-based addressee detection (T74.1)

Replace the substring _detect_addressee_id helper with a classifier
call for the multi-entity case. The substring helper is kept as a
fast-path for the no-guest case (no LLM round-trip needed when only
one bot is present, preserves throughput).

- New service chat/services/addressee.py wrapping the existing
  classifier wrapper. AddresseeDecision carries addressee_id +
  confidence + reason; classifier failure falls back to the host with
  reason="fallback" (graceful-degradation, matches the relationship_seed
  / interjection pattern).
- chat/web/turns.py post_turn now calls detect_addressee in the
  multi-entity branch; 1:1 keeps the substring path.
- tests/test_addressee.py: 3 new tests (guest pick, host pick,
  classifier-failure fallback).
- tests/test_turn_flow.py: existing multi-entity tests now feed a
  canned addressee response in the queue. The addressee-routing test
  is updated to assert classifier-driven routing rather than substring.
This commit is contained in:
Joseph Doherty
2026-04-26 17:37:26 -04:00
parent e632a6247d
commit c874883a84
4 changed files with 280 additions and 28 deletions
+99
View File
@@ -0,0 +1,99 @@
"""Addressee classifier service tests (T74.1).
Covers :func:`chat.services.addressee.detect_addressee`:
- Classifier picks the guest -> ``addressee_id == guest_id``.
- Classifier picks the host -> ``addressee_id == host_id``.
- Classifier flakes (3 bad-JSON responses, exhausting the built-in
retry budget in :func:`chat.llm.classify.classify`) -> fallback to
the host with ``reason="fallback"``.
"""
from __future__ import annotations
import json
import pytest
from chat.llm.mock import MockLLMClient
from chat.services.addressee import AddresseeDecision, detect_addressee
@pytest.mark.asyncio
async def test_classifier_picks_guest():
"""Classifier returns the guest id verbatim — caller propagates it."""
canned = [
json.dumps(
{
"addressee_id": "bot_b",
"confidence": "high",
"reason": "user named BotB",
}
)
]
client = MockLLMClient(canned=canned)
result = await detect_addressee(
client,
classifier_model="test-model",
user_prose="BotB, what do you think?",
host_id="bot_a",
host_name="BotA",
guest_id="bot_b",
guest_name="BotB",
)
assert isinstance(result, AddresseeDecision)
assert result.addressee_id == "bot_b"
assert result.confidence == "high"
@pytest.mark.asyncio
async def test_classifier_picks_host():
"""Classifier returns the host id — caller propagates it."""
canned = [
json.dumps(
{
"addressee_id": "bot_a",
"confidence": "medium",
"reason": "narration aimed at host",
}
)
]
client = MockLLMClient(canned=canned)
result = await detect_addressee(
client,
classifier_model="test-model",
user_prose="I lean back and stretch.",
host_id="bot_a",
host_name="BotA",
guest_id="bot_b",
guest_name="BotB",
)
assert result.addressee_id == "bot_a"
assert result.confidence == "medium"
@pytest.mark.asyncio
async def test_classifier_failure_falls_back_to_host():
"""Three bad-JSON responses exhaust the retry budget and the
classifier-failure fallback returns ``host_id`` with
``reason="fallback"``."""
canned = ["not json", "still not json", "garbage"]
client = MockLLMClient(canned=canned)
result = await detect_addressee(
client,
classifier_model="test-model",
user_prose="anything",
host_id="bot_a",
host_name="BotA",
guest_id="bot_b",
guest_name="BotB",
)
assert result.addressee_id == "bot_a"
assert result.reason == "fallback"
assert result.confidence == "low"
+53 -26
View File
@@ -405,14 +405,15 @@ def test_multi_bot_turn_no_interjection(app_state_setup, tmp_path):
1 user_turn + 1 assistant_turn + 6 *post-turn* edge_updates + 2
memory_written events. Single turn_html broadcast.
Canned queue (8 calls):
Canned queue (11 calls):
1. parse_turn
2. narrative stream (primary, addressee = host because the prose
2. detect_addressee (T74.1) -> host
3. narrative stream (primary, addressee = host because the prose
doesn't name the guest)
3-8. 6 state-update calls (one per directed pair across {you,
4-9. 6 state-update calls (one per directed pair across {you,
bot_a, bot_b})
9. detect_interjection -> should_interject=False
10. detect_scene_close -> should_close=False
10. detect_interjection -> should_interject=False
11. detect_scene_close -> should_close=False
"""
_seed_chat_with_guest(tmp_path / "test.db")
canned_parse = json.dumps(
@@ -420,6 +421,9 @@ def test_multi_bot_turn_no_interjection(app_state_setup, tmp_path):
)
canned = [
canned_parse,
json.dumps(
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
),
"Greetings.",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
@@ -474,14 +478,15 @@ def test_multi_bot_turn_with_interjection(app_state_setup, tmp_path):
1 user_turn + 2 assistant_turns + (6 + 6) post-turn edge_updates +
4 memory_written events.
Canned queue (16 calls):
Canned queue (17 calls):
1. parse_turn
2. narrative stream (primary)
3-8. 6 state-update calls (post-primary)
9. detect_interjection -> should_interject=True
10. narrative stream (interjection)
11-16. 6 state-update calls (post-interjection)
17. detect_scene_close -> should_close=False
2. detect_addressee (T74.1) -> host
3. narrative stream (primary)
4-9. 6 state-update calls (post-primary)
10. detect_interjection -> should_interject=True
11. narrative stream (interjection)
12-17. 6 state-update calls (post-interjection)
18. detect_scene_close -> should_close=False
"""
_seed_chat_with_guest(tmp_path / "test.db")
canned_parse = json.dumps(
@@ -489,6 +494,9 @@ def test_multi_bot_turn_with_interjection(app_state_setup, tmp_path):
)
canned = [
canned_parse,
json.dumps(
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
),
"Primary beat.",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
@@ -555,14 +563,15 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
rewrites fire for both bots (memory.pov_summary changes for each).
Interjection short-circuits at False so the queue stays compact.
Canned queue (12 calls):
Canned queue (13 calls):
1. parse_turn
2. narrative stream (primary)
3-8. 6 state-update calls
9. detect_interjection -> False (no follow-on stream)
10. detect_scene_close -> True
11. apply_scene_close_summary host POV
12. apply_scene_close_summary guest POV
2. detect_addressee (T74.1) -> host
3. narrative stream (primary)
4-9. 6 state-update calls
10. detect_interjection -> False (no follow-on stream)
11. detect_scene_close -> True
12. apply_scene_close_summary host POV
13. apply_scene_close_summary guest POV
"""
_seed_chat_with_guest(tmp_path / "test.db")
canned_parse = json.dumps(
@@ -588,6 +597,9 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
)
canned = [
canned_parse,
json.dumps(
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
),
"Goodnight.",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
@@ -639,12 +651,20 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
"""Prose that names the guest by name routes the primary turn to the
guest. Interjection (when fired) makes the host the silent witness
and the second assistant_turn carries the host as speaker.
"""T74.1: the multi-entity addressee call goes through the classifier;
when the classifier returns the guest, the primary turn routes there.
Interjection (when fired) makes the host the silent witness and the
second assistant_turn carries the host as speaker.
Canned queue: same shape as the with-interjection test (16 calls)
plus the trailing scene_close decision.
Canned queue (with classifier-led addressee = guest):
1. parse_turn
2. detect_addressee -> bot_b (the guest)
3. narrative stream (primary, addressee = guest)
4-9. 6 state-update calls
10. detect_interjection -> True
11. interjection narrative stream
12-17. 6 state-update calls (post-interjection)
18. detect_scene_close -> False
"""
_seed_chat_with_guest(tmp_path / "test.db")
canned_parse = json.dumps(
@@ -652,6 +672,13 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
)
canned = [
canned_parse,
json.dumps(
{
"addressee_id": "bot_b",
"confidence": "high",
"reason": "user named BotB",
}
),
"BotB pondering.",
_zero_state(), _zero_state(), _zero_state(),
_zero_state(), _zero_state(), _zero_state(),
@@ -680,8 +707,8 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
primary_payload = json.loads(rows[0][0])
interjection_payload = json.loads(rows[1][0])
# Primary speaker is the guest because the prose names BotB and not
# BotA (case-insensitive whole-word match).
# Primary speaker is the guest because the addressee classifier
# picked bot_b for the prose ("BotB, what do you think?").
assert primary_payload["speaker_id"] == "bot_b"
# Interjection follow-on goes to the silent witness — the host.
assert interjection_payload["speaker_id"] == "bot_a"