feat: classifier-based addressee detection (T74.1)
Replace the substring _detect_addressee_id helper with a classifier call for the multi-entity case. The substring helper is kept as a fast-path for the no-guest case (no LLM round-trip needed when only one bot is present, preserves throughput). - New service chat/services/addressee.py wrapping the existing classifier wrapper. AddresseeDecision carries addressee_id + confidence + reason; classifier failure falls back to the host with reason="fallback" (graceful-degradation, matches the relationship_seed / interjection pattern). - chat/web/turns.py post_turn now calls detect_addressee in the multi-entity branch; 1:1 keeps the substring path. - tests/test_addressee.py: 3 new tests (guest pick, host pick, classifier-failure fallback). - tests/test_turn_flow.py: existing multi-entity tests now feed a canned addressee response in the queue. The addressee-routing test is updated to assert classifier-driven routing rather than substring.
This commit is contained in:
@@ -0,0 +1,108 @@
|
||||
"""Addressee classifier service (T74.1).
|
||||
|
||||
Phase 2 (T44) detected the addressee — host vs. guest — with a simple
|
||||
case-insensitive whole-word substring match against the bots' names.
|
||||
That worked for the obvious case ("BotB, what do you think?") but lost
|
||||
the long tail: pronouns, paraphrases, indirect address, narrative
|
||||
focus on a particular party. T74.1 swaps the substring helper for a
|
||||
classifier call that reads the prose holistically.
|
||||
|
||||
The substring helper in :mod:`chat.web.turns` is kept as a fast-path
|
||||
for the no-guest case (only one bot present means there is nothing to
|
||||
classify) and as a non-breaking fallback for the regenerate path. The
|
||||
multi-entity branch in :func:`chat.web.turns.post_turn` calls
|
||||
:func:`detect_addressee` from this module.
|
||||
|
||||
Failure mode: classifier flake or low-confidence response degrades to
|
||||
the host (the default speaker per Phase 2's host-keeps-the-floor
|
||||
bias). The decision carries ``confidence`` and ``reason`` so callers
|
||||
that want to log degraded decisions can distinguish a real "host" call
|
||||
from a fallback.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from chat.llm.classify import classify
|
||||
from chat.llm.client import LLMClient
|
||||
|
||||
|
||||
class AddresseeDecision(BaseModel):
|
||||
"""Which present bot the user is addressing.
|
||||
|
||||
``addressee_id`` is the chosen bot's id. ``confidence`` is one of
|
||||
``"high"`` / ``"medium"`` / ``"low"`` — callers may treat ``"low"``
|
||||
as a soft fallback to the host. ``reason`` is a short free-form
|
||||
string. The classifier-failure fallback uses ``reason="fallback"``
|
||||
so it's distinguishable from a real low-confidence call.
|
||||
"""
|
||||
|
||||
addressee_id: str
|
||||
confidence: str = "medium" # "high" | "medium" | "low"
|
||||
reason: str = ""
|
||||
|
||||
|
||||
_SYSTEM = (
|
||||
"Given a user's turn prose and the names of present bots, decide "
|
||||
"which bot the user is addressing. If the user is speaking to no "
|
||||
"specific bot (descriptive narration, action without dialogue), "
|
||||
"default to the host. Output strict JSON matching the schema. "
|
||||
"The addressee_id MUST be one of the ids supplied in the user "
|
||||
"message — do not invent ids."
|
||||
)
|
||||
|
||||
|
||||
async def detect_addressee(
|
||||
client: LLMClient,
|
||||
*,
|
||||
classifier_model: str,
|
||||
user_prose: str,
|
||||
host_id: str,
|
||||
host_name: str,
|
||||
guest_id: str | None,
|
||||
guest_name: str | None,
|
||||
timeout_s: float = 30.0,
|
||||
) -> AddresseeDecision:
|
||||
"""Classify which present bot the user is addressing.
|
||||
|
||||
Defaults to host on classifier failure or when the classifier picks
|
||||
an id that isn't one of the supplied ids. The caller is expected to
|
||||
only invoke this in the multi-entity case (a guest is present);
|
||||
when no guest is present the substring fast-path in
|
||||
:mod:`chat.web.turns` is used instead and this function is not
|
||||
called.
|
||||
"""
|
||||
fallback = AddresseeDecision(
|
||||
addressee_id=host_id, confidence="low", reason="fallback"
|
||||
)
|
||||
user = (
|
||||
f"Host: {host_name} (id={host_id})\n"
|
||||
+ (
|
||||
f"Guest: {guest_name} (id={guest_id})\n"
|
||||
if guest_id is not None
|
||||
else ""
|
||||
)
|
||||
+ f"\nUser prose:\n{user_prose}"
|
||||
)
|
||||
decision = await classify(
|
||||
client,
|
||||
model=classifier_model,
|
||||
system=_SYSTEM,
|
||||
user=user,
|
||||
schema=AddresseeDecision,
|
||||
default=fallback,
|
||||
timeout_s=timeout_s,
|
||||
)
|
||||
# Defensive: if the classifier returned an id outside the supplied
|
||||
# set, treat it as a fallback to the host. This catches pathological
|
||||
# outputs that pass schema validation but pick a phantom id.
|
||||
valid_ids = {host_id}
|
||||
if guest_id is not None:
|
||||
valid_ids.add(guest_id)
|
||||
if decision.addressee_id not in valid_ids:
|
||||
return fallback
|
||||
return decision
|
||||
|
||||
|
||||
__all__ = ["AddresseeDecision", "detect_addressee"]
|
||||
+20
-2
@@ -55,6 +55,7 @@ from fastapi import APIRouter, Depends, Form, HTTPException, Request
|
||||
from fastapi.responses import HTMLResponse, RedirectResponse, Response
|
||||
|
||||
from chat.eventlog.log import append_and_apply, append_event
|
||||
from chat.services.addressee import detect_addressee
|
||||
from chat.services.background import SignificanceJob
|
||||
from chat.services.interjection import detect_interjection
|
||||
from chat.services.memory_write import record_turn_memory_for_present
|
||||
@@ -262,8 +263,25 @@ async def post_turn(
|
||||
|
||||
# 3. Determine the addressee. Done before assistant_turn_started so the
|
||||
# placeholder reflects the bot the user is actually talking to (host
|
||||
# in 1:1, host-or-guest in multi-entity).
|
||||
addressee_id = _detect_addressee_id(prose, host_bot, guest_bot)
|
||||
# in 1:1, host-or-guest in multi-entity). T74.1 routes the multi-entity
|
||||
# case through the addressee classifier; the no-guest case still uses
|
||||
# the substring fast-path because there is nothing to classify when
|
||||
# only one bot is present (and a classifier round-trip there would
|
||||
# just be throughput overhead).
|
||||
if guest_bot is None:
|
||||
addressee_id = _detect_addressee_id(prose, host_bot, guest_bot)
|
||||
else:
|
||||
decision = await detect_addressee(
|
||||
client,
|
||||
classifier_model=settings.classifier_model,
|
||||
user_prose=prose,
|
||||
host_id=host_bot["id"],
|
||||
host_name=host_bot["name"],
|
||||
guest_id=guest_bot["id"],
|
||||
guest_name=guest_bot["name"],
|
||||
timeout_s=settings.classifier_timeout_s,
|
||||
)
|
||||
addressee_id = decision.addressee_id
|
||||
addressee_bot = (
|
||||
guest_bot if (guest_bot is not None and addressee_id == guest_bot["id"])
|
||||
else host_bot
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
"""Addressee classifier service tests (T74.1).
|
||||
|
||||
Covers :func:`chat.services.addressee.detect_addressee`:
|
||||
|
||||
- Classifier picks the guest -> ``addressee_id == guest_id``.
|
||||
- Classifier picks the host -> ``addressee_id == host_id``.
|
||||
- Classifier flakes (3 bad-JSON responses, exhausting the built-in
|
||||
retry budget in :func:`chat.llm.classify.classify`) -> fallback to
|
||||
the host with ``reason="fallback"``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from chat.llm.mock import MockLLMClient
|
||||
from chat.services.addressee import AddresseeDecision, detect_addressee
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_classifier_picks_guest():
|
||||
"""Classifier returns the guest id verbatim — caller propagates it."""
|
||||
canned = [
|
||||
json.dumps(
|
||||
{
|
||||
"addressee_id": "bot_b",
|
||||
"confidence": "high",
|
||||
"reason": "user named BotB",
|
||||
}
|
||||
)
|
||||
]
|
||||
client = MockLLMClient(canned=canned)
|
||||
|
||||
result = await detect_addressee(
|
||||
client,
|
||||
classifier_model="test-model",
|
||||
user_prose="BotB, what do you think?",
|
||||
host_id="bot_a",
|
||||
host_name="BotA",
|
||||
guest_id="bot_b",
|
||||
guest_name="BotB",
|
||||
)
|
||||
|
||||
assert isinstance(result, AddresseeDecision)
|
||||
assert result.addressee_id == "bot_b"
|
||||
assert result.confidence == "high"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_classifier_picks_host():
|
||||
"""Classifier returns the host id — caller propagates it."""
|
||||
canned = [
|
||||
json.dumps(
|
||||
{
|
||||
"addressee_id": "bot_a",
|
||||
"confidence": "medium",
|
||||
"reason": "narration aimed at host",
|
||||
}
|
||||
)
|
||||
]
|
||||
client = MockLLMClient(canned=canned)
|
||||
|
||||
result = await detect_addressee(
|
||||
client,
|
||||
classifier_model="test-model",
|
||||
user_prose="I lean back and stretch.",
|
||||
host_id="bot_a",
|
||||
host_name="BotA",
|
||||
guest_id="bot_b",
|
||||
guest_name="BotB",
|
||||
)
|
||||
|
||||
assert result.addressee_id == "bot_a"
|
||||
assert result.confidence == "medium"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_classifier_failure_falls_back_to_host():
|
||||
"""Three bad-JSON responses exhaust the retry budget and the
|
||||
classifier-failure fallback returns ``host_id`` with
|
||||
``reason="fallback"``."""
|
||||
canned = ["not json", "still not json", "garbage"]
|
||||
client = MockLLMClient(canned=canned)
|
||||
|
||||
result = await detect_addressee(
|
||||
client,
|
||||
classifier_model="test-model",
|
||||
user_prose="anything",
|
||||
host_id="bot_a",
|
||||
host_name="BotA",
|
||||
guest_id="bot_b",
|
||||
guest_name="BotB",
|
||||
)
|
||||
|
||||
assert result.addressee_id == "bot_a"
|
||||
assert result.reason == "fallback"
|
||||
assert result.confidence == "low"
|
||||
+53
-26
@@ -405,14 +405,15 @@ def test_multi_bot_turn_no_interjection(app_state_setup, tmp_path):
|
||||
1 user_turn + 1 assistant_turn + 6 *post-turn* edge_updates + 2
|
||||
memory_written events. Single turn_html broadcast.
|
||||
|
||||
Canned queue (8 calls):
|
||||
Canned queue (11 calls):
|
||||
1. parse_turn
|
||||
2. narrative stream (primary, addressee = host because the prose
|
||||
2. detect_addressee (T74.1) -> host
|
||||
3. narrative stream (primary, addressee = host because the prose
|
||||
doesn't name the guest)
|
||||
3-8. 6 state-update calls (one per directed pair across {you,
|
||||
4-9. 6 state-update calls (one per directed pair across {you,
|
||||
bot_a, bot_b})
|
||||
9. detect_interjection -> should_interject=False
|
||||
10. detect_scene_close -> should_close=False
|
||||
10. detect_interjection -> should_interject=False
|
||||
11. detect_scene_close -> should_close=False
|
||||
"""
|
||||
_seed_chat_with_guest(tmp_path / "test.db")
|
||||
canned_parse = json.dumps(
|
||||
@@ -420,6 +421,9 @@ def test_multi_bot_turn_no_interjection(app_state_setup, tmp_path):
|
||||
)
|
||||
canned = [
|
||||
canned_parse,
|
||||
json.dumps(
|
||||
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
|
||||
),
|
||||
"Greetings.",
|
||||
_zero_state(), _zero_state(), _zero_state(),
|
||||
_zero_state(), _zero_state(), _zero_state(),
|
||||
@@ -474,14 +478,15 @@ def test_multi_bot_turn_with_interjection(app_state_setup, tmp_path):
|
||||
1 user_turn + 2 assistant_turns + (6 + 6) post-turn edge_updates +
|
||||
4 memory_written events.
|
||||
|
||||
Canned queue (16 calls):
|
||||
Canned queue (17 calls):
|
||||
1. parse_turn
|
||||
2. narrative stream (primary)
|
||||
3-8. 6 state-update calls (post-primary)
|
||||
9. detect_interjection -> should_interject=True
|
||||
10. narrative stream (interjection)
|
||||
11-16. 6 state-update calls (post-interjection)
|
||||
17. detect_scene_close -> should_close=False
|
||||
2. detect_addressee (T74.1) -> host
|
||||
3. narrative stream (primary)
|
||||
4-9. 6 state-update calls (post-primary)
|
||||
10. detect_interjection -> should_interject=True
|
||||
11. narrative stream (interjection)
|
||||
12-17. 6 state-update calls (post-interjection)
|
||||
18. detect_scene_close -> should_close=False
|
||||
"""
|
||||
_seed_chat_with_guest(tmp_path / "test.db")
|
||||
canned_parse = json.dumps(
|
||||
@@ -489,6 +494,9 @@ def test_multi_bot_turn_with_interjection(app_state_setup, tmp_path):
|
||||
)
|
||||
canned = [
|
||||
canned_parse,
|
||||
json.dumps(
|
||||
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
|
||||
),
|
||||
"Primary beat.",
|
||||
_zero_state(), _zero_state(), _zero_state(),
|
||||
_zero_state(), _zero_state(), _zero_state(),
|
||||
@@ -555,14 +563,15 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
|
||||
rewrites fire for both bots (memory.pov_summary changes for each).
|
||||
Interjection short-circuits at False so the queue stays compact.
|
||||
|
||||
Canned queue (12 calls):
|
||||
Canned queue (13 calls):
|
||||
1. parse_turn
|
||||
2. narrative stream (primary)
|
||||
3-8. 6 state-update calls
|
||||
9. detect_interjection -> False (no follow-on stream)
|
||||
10. detect_scene_close -> True
|
||||
11. apply_scene_close_summary host POV
|
||||
12. apply_scene_close_summary guest POV
|
||||
2. detect_addressee (T74.1) -> host
|
||||
3. narrative stream (primary)
|
||||
4-9. 6 state-update calls
|
||||
10. detect_interjection -> False (no follow-on stream)
|
||||
11. detect_scene_close -> True
|
||||
12. apply_scene_close_summary host POV
|
||||
13. apply_scene_close_summary guest POV
|
||||
"""
|
||||
_seed_chat_with_guest(tmp_path / "test.db")
|
||||
canned_parse = json.dumps(
|
||||
@@ -588,6 +597,9 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
|
||||
)
|
||||
canned = [
|
||||
canned_parse,
|
||||
json.dumps(
|
||||
{"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
|
||||
),
|
||||
"Goodnight.",
|
||||
_zero_state(), _zero_state(), _zero_state(),
|
||||
_zero_state(), _zero_state(), _zero_state(),
|
||||
@@ -639,12 +651,20 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
|
||||
|
||||
|
||||
def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
|
||||
"""Prose that names the guest by name routes the primary turn to the
|
||||
guest. Interjection (when fired) makes the host the silent witness
|
||||
and the second assistant_turn carries the host as speaker.
|
||||
"""T74.1: the multi-entity addressee call goes through the classifier;
|
||||
when the classifier returns the guest, the primary turn routes there.
|
||||
Interjection (when fired) makes the host the silent witness and the
|
||||
second assistant_turn carries the host as speaker.
|
||||
|
||||
Canned queue: same shape as the with-interjection test (16 calls)
|
||||
plus the trailing scene_close decision.
|
||||
Canned queue (with classifier-led addressee = guest):
|
||||
1. parse_turn
|
||||
2. detect_addressee -> bot_b (the guest)
|
||||
3. narrative stream (primary, addressee = guest)
|
||||
4-9. 6 state-update calls
|
||||
10. detect_interjection -> True
|
||||
11. interjection narrative stream
|
||||
12-17. 6 state-update calls (post-interjection)
|
||||
18. detect_scene_close -> False
|
||||
"""
|
||||
_seed_chat_with_guest(tmp_path / "test.db")
|
||||
canned_parse = json.dumps(
|
||||
@@ -652,6 +672,13 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
|
||||
)
|
||||
canned = [
|
||||
canned_parse,
|
||||
json.dumps(
|
||||
{
|
||||
"addressee_id": "bot_b",
|
||||
"confidence": "high",
|
||||
"reason": "user named BotB",
|
||||
}
|
||||
),
|
||||
"BotB pondering.",
|
||||
_zero_state(), _zero_state(), _zero_state(),
|
||||
_zero_state(), _zero_state(), _zero_state(),
|
||||
@@ -680,8 +707,8 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
|
||||
primary_payload = json.loads(rows[0][0])
|
||||
interjection_payload = json.loads(rows[1][0])
|
||||
|
||||
# Primary speaker is the guest because the prose names BotB and not
|
||||
# BotA (case-insensitive whole-word match).
|
||||
# Primary speaker is the guest because the addressee classifier
|
||||
# picked bot_b for the prose ("BotB, what do you think?").
|
||||
assert primary_payload["speaker_id"] == "bot_b"
|
||||
# Interjection follow-on goes to the silent witness — the host.
|
||||
assert interjection_payload["speaker_id"] == "bot_a"
|
||||
|
||||
Reference in New Issue
Block a user