From c874883a849aa2c5efadd317dec067b550f02f82 Mon Sep 17 00:00:00 2001
From: Joseph Doherty <dohejw01@gmail.com>
Date: Sun, 26 Apr 2026 17:37:26 -0400
Subject: [PATCH 1/4] feat: classifier-based addressee detection (T74.1)

Replace the substring _detect_addressee_id helper with a classifier
call for the multi-entity case. The substring helper is kept as a
fast-path for the no-guest case (no LLM round-trip needed when only
one bot is present, preserves throughput).

- New service chat/services/addressee.py wrapping the existing
  classifier wrapper. AddresseeDecision carries addressee_id +
  confidence + reason; classifier failure falls back to the host with
  reason="fallback" (graceful-degradation, matches the relationship_seed
  / interjection pattern).
- chat/web/turns.py post_turn now calls detect_addressee in the
  multi-entity branch; 1:1 keeps the substring path.
- tests/test_addressee.py: 3 new tests (guest pick, host pick,
  classifier-failure fallback).
- tests/test_turn_flow.py: existing multi-entity tests now feed a
  canned addressee response in the queue. The addressee-routing test
  is updated to assert classifier-driven routing rather than substring.
---
 chat/services/addressee.py | 108 +++++++++++++++++++++++++++++++++++++
 chat/web/turns.py          |  22 +++++++-
 tests/test_addressee.py    |  99 ++++++++++++++++++++++++++++++++++
 tests/test_turn_flow.py    |  79 ++++++++++++++++++---------
 4 files changed, 280 insertions(+), 28 deletions(-)
 create mode 100644 chat/services/addressee.py
 create mode 100644 tests/test_addressee.py

diff --git a/chat/services/addressee.py b/chat/services/addressee.py
new file mode 100644
index 0000000..e085d79
--- /dev/null
+++ b/chat/services/addressee.py
@@ -0,0 +1,108 @@
+"""Addressee classifier service (T74.1).
+
+Phase 2 (T44) detected the addressee — host vs. guest — with a simple
+case-insensitive whole-word substring match against the bots' names.
+That worked for the obvious case ("BotB, what do you think?") but lost
+the long tail: pronouns, paraphrases, indirect address, narrative
+focus on a particular party. T74.1 swaps the substring helper for a
+classifier call that reads the prose holistically.
+
+The substring helper in :mod:`chat.web.turns` is kept as a fast-path
+for the no-guest case (only one bot present means there is nothing to
+classify) and as a non-breaking fallback for the regenerate path. The
+multi-entity branch in :func:`chat.web.turns.post_turn` calls
+:func:`detect_addressee` from this module.
+
+Failure mode: classifier flake or low-confidence response degrades to
+the host (the default speaker per Phase 2's host-keeps-the-floor
+bias). The decision carries ``confidence`` and ``reason`` so callers
+that want to log degraded decisions can distinguish a real "host" call
+from a fallback.
+"""
+
+from __future__ import annotations
+
+from pydantic import BaseModel
+
+from chat.llm.classify import classify
+from chat.llm.client import LLMClient
+
+
+class AddresseeDecision(BaseModel):
+    """Which present bot the user is addressing.
+
+    ``addressee_id`` is the chosen bot's id. ``confidence`` is one of
+    ``"high"`` / ``"medium"`` / ``"low"`` — callers may treat ``"low"``
+    as a soft fallback to the host. ``reason`` is a short free-form
+    string. The classifier-failure fallback uses ``reason="fallback"``
+    so it's distinguishable from a real low-confidence call.
+    """
+
+    addressee_id: str
+    confidence: str = "medium"  # "high" | "medium" | "low"
+    reason: str = ""
+
+
+_SYSTEM = (
+    "Given a user's turn prose and the names of present bots, decide "
+    "which bot the user is addressing. If the user is speaking to no "
+    "specific bot (descriptive narration, action without dialogue), "
+    "default to the host. Output strict JSON matching the schema. "
+    "The addressee_id MUST be one of the ids supplied in the user "
+    "message — do not invent ids."
+)
+
+
+async def detect_addressee(
+    client: LLMClient,
+    *,
+    classifier_model: str,
+    user_prose: str,
+    host_id: str,
+    host_name: str,
+    guest_id: str | None,
+    guest_name: str | None,
+    timeout_s: float = 30.0,
+) -> AddresseeDecision:
+    """Classify which present bot the user is addressing.
+
+    Defaults to host on classifier failure or when the classifier picks
+    an id that isn't one of the supplied ids. The caller is expected to
+    only invoke this in the multi-entity case (a guest is present);
+    when no guest is present the substring fast-path in
+    :mod:`chat.web.turns` is used instead and this function is not
+    called.
+    """
+    fallback = AddresseeDecision(
+        addressee_id=host_id, confidence="low", reason="fallback"
+    )
+    user = (
+        f"Host: {host_name} (id={host_id})\n"
+        + (
+            f"Guest: {guest_name} (id={guest_id})\n"
+            if guest_id is not None
+            else ""
+        )
+        + f"\nUser prose:\n{user_prose}"
+    )
+    decision = await classify(
+        client,
+        model=classifier_model,
+        system=_SYSTEM,
+        user=user,
+        schema=AddresseeDecision,
+        default=fallback,
+        timeout_s=timeout_s,
+    )
+    # Defensive: if the classifier returned an id outside the supplied
+    # set, treat it as a fallback to the host. This catches pathological
+    # outputs that pass schema validation but pick a phantom id.
+    valid_ids = {host_id}
+    if guest_id is not None:
+        valid_ids.add(guest_id)
+    if decision.addressee_id not in valid_ids:
+        return fallback
+    return decision
+
+
+__all__ = ["AddresseeDecision", "detect_addressee"]
diff --git a/chat/web/turns.py b/chat/web/turns.py
index 940afbf..4309b8e 100644
--- a/chat/web/turns.py
+++ b/chat/web/turns.py
@@ -55,6 +55,7 @@ from fastapi import APIRouter, Depends, Form, HTTPException, Request
 from fastapi.responses import HTMLResponse, RedirectResponse, Response
 
 from chat.eventlog.log import append_and_apply, append_event
+from chat.services.addressee import detect_addressee
 from chat.services.background import SignificanceJob
 from chat.services.interjection import detect_interjection
 from chat.services.memory_write import record_turn_memory_for_present
@@ -262,8 +263,25 @@ async def post_turn(
 
     # 3. Determine the addressee. Done before assistant_turn_started so the
     # placeholder reflects the bot the user is actually talking to (host
-    # in 1:1, host-or-guest in multi-entity).
-    addressee_id = _detect_addressee_id(prose, host_bot, guest_bot)
+    # in 1:1, host-or-guest in multi-entity). T74.1 routes the multi-entity
+    # case through the addressee classifier; the no-guest case still uses
+    # the substring fast-path because there is nothing to classify when
+    # only one bot is present (and a classifier round-trip there would
+    # just be throughput overhead).
+    if guest_bot is None:
+        addressee_id = _detect_addressee_id(prose, host_bot, guest_bot)
+    else:
+        decision = await detect_addressee(
+            client,
+            classifier_model=settings.classifier_model,
+            user_prose=prose,
+            host_id=host_bot["id"],
+            host_name=host_bot["name"],
+            guest_id=guest_bot["id"],
+            guest_name=guest_bot["name"],
+            timeout_s=settings.classifier_timeout_s,
+        )
+        addressee_id = decision.addressee_id
     addressee_bot = (
         guest_bot if (guest_bot is not None and addressee_id == guest_bot["id"])
         else host_bot
diff --git a/tests/test_addressee.py b/tests/test_addressee.py
new file mode 100644
index 0000000..71954cf
--- /dev/null
+++ b/tests/test_addressee.py
@@ -0,0 +1,99 @@
+"""Addressee classifier service tests (T74.1).
+
+Covers :func:`chat.services.addressee.detect_addressee`:
+
+- Classifier picks the guest -> ``addressee_id == guest_id``.
+- Classifier picks the host -> ``addressee_id == host_id``.
+- Classifier flakes (3 bad-JSON responses, exhausting the built-in
+  retry budget in :func:`chat.llm.classify.classify`) -> fallback to
+  the host with ``reason="fallback"``.
+"""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from chat.llm.mock import MockLLMClient
+from chat.services.addressee import AddresseeDecision, detect_addressee
+
+
+@pytest.mark.asyncio
+async def test_classifier_picks_guest():
+    """Classifier returns the guest id verbatim — caller propagates it."""
+    canned = [
+        json.dumps(
+            {
+                "addressee_id": "bot_b",
+                "confidence": "high",
+                "reason": "user named BotB",
+            }
+        )
+    ]
+    client = MockLLMClient(canned=canned)
+
+    result = await detect_addressee(
+        client,
+        classifier_model="test-model",
+        user_prose="BotB, what do you think?",
+        host_id="bot_a",
+        host_name="BotA",
+        guest_id="bot_b",
+        guest_name="BotB",
+    )
+
+    assert isinstance(result, AddresseeDecision)
+    assert result.addressee_id == "bot_b"
+    assert result.confidence == "high"
+
+
+@pytest.mark.asyncio
+async def test_classifier_picks_host():
+    """Classifier returns the host id — caller propagates it."""
+    canned = [
+        json.dumps(
+            {
+                "addressee_id": "bot_a",
+                "confidence": "medium",
+                "reason": "narration aimed at host",
+            }
+        )
+    ]
+    client = MockLLMClient(canned=canned)
+
+    result = await detect_addressee(
+        client,
+        classifier_model="test-model",
+        user_prose="I lean back and stretch.",
+        host_id="bot_a",
+        host_name="BotA",
+        guest_id="bot_b",
+        guest_name="BotB",
+    )
+
+    assert result.addressee_id == "bot_a"
+    assert result.confidence == "medium"
+
+
+@pytest.mark.asyncio
+async def test_classifier_failure_falls_back_to_host():
+    """Three bad-JSON responses exhaust the retry budget and the
+    classifier-failure fallback returns ``host_id`` with
+    ``reason="fallback"``."""
+    canned = ["not json", "still not json", "garbage"]
+    client = MockLLMClient(canned=canned)
+
+    result = await detect_addressee(
+        client,
+        classifier_model="test-model",
+        user_prose="anything",
+        host_id="bot_a",
+        host_name="BotA",
+        guest_id="bot_b",
+        guest_name="BotB",
+    )
+
+    assert result.addressee_id == "bot_a"
+    assert result.reason == "fallback"
+    assert result.confidence == "low"
diff --git a/tests/test_turn_flow.py b/tests/test_turn_flow.py
index 7d04755..665dc0c 100644
--- a/tests/test_turn_flow.py
+++ b/tests/test_turn_flow.py
@@ -405,14 +405,15 @@ def test_multi_bot_turn_no_interjection(app_state_setup, tmp_path):
     1 user_turn + 1 assistant_turn + 6 *post-turn* edge_updates + 2
     memory_written events. Single turn_html broadcast.
 
-    Canned queue (8 calls):
+    Canned queue (11 calls):
       1. parse_turn
-      2. narrative stream (primary, addressee = host because the prose
+      2. detect_addressee (T74.1) -> host
+      3. narrative stream (primary, addressee = host because the prose
          doesn't name the guest)
-      3-8. 6 state-update calls (one per directed pair across {you,
+      4-9. 6 state-update calls (one per directed pair across {you,
          bot_a, bot_b})
-      9. detect_interjection -> should_interject=False
-      10. detect_scene_close -> should_close=False
+      10. detect_interjection -> should_interject=False
+      11. detect_scene_close -> should_close=False
     """
     _seed_chat_with_guest(tmp_path / "test.db")
     canned_parse = json.dumps(
@@ -420,6 +421,9 @@ def test_multi_bot_turn_no_interjection(app_state_setup, tmp_path):
     )
     canned = [
         canned_parse,
+        json.dumps(
+            {"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
+        ),
         "Greetings.",
         _zero_state(), _zero_state(), _zero_state(),
         _zero_state(), _zero_state(), _zero_state(),
@@ -474,14 +478,15 @@ def test_multi_bot_turn_with_interjection(app_state_setup, tmp_path):
     1 user_turn + 2 assistant_turns + (6 + 6) post-turn edge_updates +
     4 memory_written events.
 
-    Canned queue (16 calls):
+    Canned queue (17 calls):
       1. parse_turn
-      2. narrative stream (primary)
-      3-8. 6 state-update calls (post-primary)
-      9. detect_interjection -> should_interject=True
-      10. narrative stream (interjection)
-      11-16. 6 state-update calls (post-interjection)
-      17. detect_scene_close -> should_close=False
+      2. detect_addressee (T74.1) -> host
+      3. narrative stream (primary)
+      4-9. 6 state-update calls (post-primary)
+      10. detect_interjection -> should_interject=True
+      11. narrative stream (interjection)
+      12-17. 6 state-update calls (post-interjection)
+      18. detect_scene_close -> should_close=False
     """
     _seed_chat_with_guest(tmp_path / "test.db")
     canned_parse = json.dumps(
@@ -489,6 +494,9 @@ def test_multi_bot_turn_with_interjection(app_state_setup, tmp_path):
     )
     canned = [
         canned_parse,
+        json.dumps(
+            {"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
+        ),
         "Primary beat.",
         _zero_state(), _zero_state(), _zero_state(),
         _zero_state(), _zero_state(), _zero_state(),
@@ -555,14 +563,15 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
     rewrites fire for both bots (memory.pov_summary changes for each).
     Interjection short-circuits at False so the queue stays compact.
 
-    Canned queue (12 calls):
+    Canned queue (13 calls):
       1. parse_turn
-      2. narrative stream (primary)
-      3-8. 6 state-update calls
-      9. detect_interjection -> False (no follow-on stream)
-      10. detect_scene_close -> True
-      11. apply_scene_close_summary host POV
-      12. apply_scene_close_summary guest POV
+      2. detect_addressee (T74.1) -> host
+      3. narrative stream (primary)
+      4-9. 6 state-update calls
+      10. detect_interjection -> False (no follow-on stream)
+      11. detect_scene_close -> True
+      12. apply_scene_close_summary host POV
+      13. apply_scene_close_summary guest POV
     """
     _seed_chat_with_guest(tmp_path / "test.db")
     canned_parse = json.dumps(
@@ -588,6 +597,9 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
     )
     canned = [
         canned_parse,
+        json.dumps(
+            {"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
+        ),
         "Goodnight.",
         _zero_state(), _zero_state(), _zero_state(),
         _zero_state(), _zero_state(), _zero_state(),
@@ -639,12 +651,20 @@ def test_multi_bot_turn_scene_close_writes_per_pov_summaries(
 
 
 def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
-    """Prose that names the guest by name routes the primary turn to the
-    guest. Interjection (when fired) makes the host the silent witness
-    and the second assistant_turn carries the host as speaker.
+    """T74.1: the multi-entity addressee call goes through the classifier;
+    when the classifier returns the guest, the primary turn routes there.
+    Interjection (when fired) makes the host the silent witness and the
+    second assistant_turn carries the host as speaker.
 
-    Canned queue: same shape as the with-interjection test (16 calls)
-    plus the trailing scene_close decision.
+    Canned queue (with classifier-led addressee = guest):
+      1. parse_turn
+      2. detect_addressee -> bot_b (the guest)
+      3. narrative stream (primary, addressee = guest)
+      4-9. 6 state-update calls
+      10. detect_interjection -> True
+      11. interjection narrative stream
+      12-17. 6 state-update calls (post-interjection)
+      18. detect_scene_close -> False
     """
     _seed_chat_with_guest(tmp_path / "test.db")
     canned_parse = json.dumps(
@@ -652,6 +672,13 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
     )
     canned = [
         canned_parse,
+        json.dumps(
+            {
+                "addressee_id": "bot_b",
+                "confidence": "high",
+                "reason": "user named BotB",
+            }
+        ),
         "BotB pondering.",
         _zero_state(), _zero_state(), _zero_state(),
         _zero_state(), _zero_state(), _zero_state(),
@@ -680,8 +707,8 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
         primary_payload = json.loads(rows[0][0])
         interjection_payload = json.loads(rows[1][0])
 
-    # Primary speaker is the guest because the prose names BotB and not
-    # BotA (case-insensitive whole-word match).
+    # Primary speaker is the guest because the addressee classifier
+    # picked bot_b for the prose ("BotB, what do you think?").
     assert primary_payload["speaker_id"] == "bot_b"
     # Interjection follow-on goes to the silent witness — the host.
     assert interjection_payload["speaker_id"] == "bot_a"

From 88fae3315229e5add1e93d28b38fbe71b74a5886 Mon Sep 17 00:00:00 2001
From: Joseph Doherty <dohejw01@gmail.com>
Date: Sun, 26 Apr 2026 17:38:30 -0400
Subject: [PATCH 2/4] fix: enqueue significance for interjection memories
 (T74.2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

T44's interjection branch wrote interjection memories via
record_turn_memory_for_present but never enqueued a SignificanceJob,
so the interjection beat could land in memory but never be scored —
which meant it could never auto-pin even when it carried a pivotal
moment.

- Capture the host-POV memory id from the interjection's memory write
  result and enqueue a SignificanceJob mirroring the primary turn's
  pattern. One enqueue per beat (host id; guest POV piggybacks on the
  same score since the prose is identical for v2 — per-POV rewrite
  happens at scene close in T45).
- New test test_interjection_enqueues_significance_job pins the
  contract by intercepting worker.enqueue and asserting two distinct
  jobs land per 3-entity turn that fires an interjection.
---
 chat/web/turns.py       | 29 ++++++++++++++++++-
 tests/test_turn_flow.py | 62 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/chat/web/turns.py b/chat/web/turns.py
index 4309b8e..ab1308c 100644
--- a/chat/web/turns.py
+++ b/chat/web/turns.py
@@ -616,7 +616,7 @@ async def post_turn(
 
                 # Memory write for the interjection beat — a second pair
                 # of memory_written events (host + guest POVs).
-                record_turn_memory_for_present(
+                interject_memory_results = record_turn_memory_for_present(
                     conn,
                     chat_id=chat_id,
                     host_bot_id=host_bot["id"],
@@ -626,6 +626,33 @@ async def post_turn(
                     chat_clock_at=chat.get("time"),
                 )
 
+                # T74.2: enqueue a significance pass for the interjection
+                # memory. Mirrors the primary-turn enqueue pattern above —
+                # we score on the host's memory id since the prose is
+                # identical across both POVs (per-POV rewrite happens at
+                # scene close in T45). Without this enqueue the
+                # interjection beat lands in memory but never gets scored,
+                # so it can never auto-pin even when it carries a pivotal
+                # moment.
+                interject_host_event = interject_memory_results.get(
+                    host_bot["id"]
+                )
+                interject_host_memory_id = (
+                    interject_host_event[1] if interject_host_event else None
+                )
+                if (
+                    worker is not None
+                    and interject_host_memory_id is not None
+                ):
+                    worker.enqueue(
+                        SignificanceJob(
+                            memory_id=interject_host_memory_id,
+                            narrative_text=interjection_text,
+                            prior_dialogue=recent_post_interject,
+                            host_bot_id=host_bot["id"],
+                        )
+                    )
+
     # 9. Scene-close detection (Plan §7.2, T26). Runs AFTER assistant_turn
     # and the optional interjection so the bots' responses are part of
     # the closing scene's final beat — closing before narrative would
diff --git a/tests/test_turn_flow.py b/tests/test_turn_flow.py
index 665dc0c..281d123 100644
--- a/tests/test_turn_flow.py
+++ b/tests/test_turn_flow.py
@@ -713,3 +713,65 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
     # Interjection follow-on goes to the silent witness — the host.
     assert interjection_payload["speaker_id"] == "bot_a"
     assert interjection_payload["interjection_of"] == "bot_b"
+
+
+def test_interjection_enqueues_significance_job(app_state_setup, tmp_path):
+    """T74.2: when an interjection fires, the interjection memory is
+    enqueued for significance scoring just like the primary memory.
+
+    Capture enqueued ``SignificanceJob``s by replacing the background
+    worker's ``enqueue`` method with a list-append. Without T74.2, the
+    interjection memory would never be scored — only the primary's
+    enqueue would land. We therefore expect TWO jobs after a turn that
+    has both a primary and an interjection beat: one for the primary
+    memory, one for the interjection memory.
+    """
+    _seed_chat_with_guest(tmp_path / "test.db")
+    canned_parse = json.dumps(
+        {"segments": [{"kind": "dialogue", "text": "tell me"}]}
+    )
+    canned = [
+        canned_parse,
+        json.dumps(
+            {"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
+        ),
+        "Primary beat.",
+        _zero_state(), _zero_state(), _zero_state(),
+        _zero_state(), _zero_state(), _zero_state(),
+        json.dumps({"should_interject": True, "reason": "jealous"}),
+        "Interjection beat!",
+        _zero_state(), _zero_state(), _zero_state(),
+        _zero_state(), _zero_state(), _zero_state(),
+        json.dumps({"should_close": False, "reason": "no signal"}),
+    ]
+    _override_llm(canned)
+
+    captured_jobs: list = []
+    worker = app.state.background_worker
+    # Re-enable enqueue capture even though the worker's loop is disabled
+    # — we want to count enqueues without the loop running classifier work.
+    worker.enabled = True
+    original_enqueue = worker.enqueue
+    worker.enqueue = captured_jobs.append  # type: ignore[assignment]
+
+    try:
+        response = app_state_setup.post(
+            "/chats/chat_bot_a/turns", data={"prose": "tell me"}
+        )
+        assert response.status_code == 204
+    finally:
+        worker.enqueue = original_enqueue  # type: ignore[assignment]
+        worker.enabled = False
+        app.dependency_overrides.clear()
+
+    # Expect 2 enqueues: 1 for the primary memory + 1 for the
+    # interjection memory.
+    assert len(captured_jobs) == 2
+
+    # Both jobs should reference distinct memory ids — the primary's
+    # host-POV memory and the interjection's host-POV memory.
+    memory_ids = [job.memory_id for job in captured_jobs]
+    assert len(set(memory_ids)) == 2
+    # The two narrative texts should be the two streamed beats.
+    narrative_texts = sorted(job.narrative_text for job in captured_jobs)
+    assert narrative_texts == ["Interjection beat!", "Primary beat."]

From bfb2ffb6f6ca21b091620256e9f675495e7939b0 Mon Sep 17 00:00:00 2001
From: Joseph Doherty <dohejw01@gmail.com>
Date: Sun, 26 Apr 2026 17:40:12 -0400
Subject: [PATCH 3/4] chore: pin scene-close-on-cancel behavior + comment
 rationale (T74.3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 2 T44 review noted that scene close still runs when a primary
turn is cancelled mid-stream and asked the implementer to review.

Review finding: the existing behavior is correct, not a bug. The
close-detection branch in post_turn consumes ONLY the user's prose
(fully appended to the event_log BEFORE streaming starts) and the
current container name. It does NOT consume the bot's output. A user
who types "we're done here, fade out" and then hits Stop mid-stream
still meant to close — the cancelled bot beat doesn't invalidate
that intent.

- Document the rationale with an inline comment near the
  close-detection branch in chat/web/turns.py.
- Add regression test
  test_cancelled_turn_still_closes_scene_when_user_prose_signals_close
  that drives a stream raising CancelledError on first iteration and
  asserts the scene_closed event still lands.
---
 chat/web/turns.py       |   9 +++
 tests/test_turn_flow.py | 121 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 130 insertions(+)

diff --git a/chat/web/turns.py b/chat/web/turns.py
index ab1308c..48882f0 100644
--- a/chat/web/turns.py
+++ b/chat/web/turns.py
@@ -668,6 +668,15 @@ async def post_turn(
     # close in the same chat) — we have nothing to close. T13 (kickoff)
     # is the only scene-opener path in v1; Phase 2-3 will handle
     # automatic re-opening with the next container.
+    #
+    # T74.3: this branch deliberately runs even when ``cancelled`` is
+    # True. Close detection consumes only the user's prose (which is
+    # fully appended to the event_log BEFORE streaming starts) and the
+    # current container name; it does NOT consume the bot's output.
+    # A user who types "we're done here, fade out" and then hits Stop
+    # mid-stream still meant to close the scene — the cancelled bot
+    # beat doesn't invalidate that intent. Pinned by
+    # test_cancelled_turn_still_closes_scene_when_user_prose_signals_close.
     if scene is not None and prose.strip():
         container = None
         if scene.get("container_id") is not None:
diff --git a/tests/test_turn_flow.py b/tests/test_turn_flow.py
index 281d123..a30ec24 100644
--- a/tests/test_turn_flow.py
+++ b/tests/test_turn_flow.py
@@ -715,6 +715,127 @@ def test_addressee_detection_routes_to_named_bot(app_state_setup, tmp_path):
     assert interjection_payload["interjection_of"] == "bot_b"
 
 
+def test_cancelled_turn_still_closes_scene_when_user_prose_signals_close(
+    app_state_setup, tmp_path
+):
+    """T74.3 regression: a cancelled primary stream still triggers scene
+    close when the user prose carries a hard close signal.
+
+    Rationale (also documented in turns.py near the close-detection
+    branch): close detection only consumes the user's prose, which is
+    fully appended to the event_log BEFORE streaming starts. The
+    cancelled bot beat doesn't invalidate the user's intent to close.
+
+    Implementation: install a MockLLMClient whose ``stream`` raises
+    CancelledError on the first iteration. The classifier calls (parse,
+    addressee, scene_close, per-POV summaries) are still served from
+    the canned queue. The post_turn route ultimately re-raises
+    CancelledError after recording the partial — TestClient surfaces
+    that as an exception, so we drive the request inside ``with
+    pytest.raises``. Despite the exception, the scene_closed event
+    must land in the event_log.
+    """
+    from typing import AsyncIterator, Sequence
+
+    _seed_chat_with_guest(tmp_path / "test.db")
+    canned_parse = json.dumps(
+        {"segments": [{"kind": "narration", "text": "we are done here, fade out"}]}
+    )
+    pov_payload = json.dumps(
+        {
+            "summary": "BotA noticed the day winding down.",
+            "knowledge_facts": [],
+            "relationship_summary": "warmer",
+        }
+    )
+    pov_payload_guest = json.dumps(
+        {
+            "summary": "BotB watched the scene close.",
+            "knowledge_facts": [],
+            "relationship_summary": "warmer",
+        }
+    )
+    # Canned queue: parse + addressee + 6 state-updates +
+    # scene_close=True + 2 per-POV summaries. NO interjection slot
+    # because the cancel path short-circuits the interjection branch.
+    canned = [
+        canned_parse,
+        json.dumps(
+            {"addressee_id": "bot_a", "confidence": "medium", "reason": "host"}
+        ),
+        # NOTE: no narrative slot — the stream is hijacked below to
+        # raise CancelledError on first iteration; it never pulls a
+        # canned response.
+        _zero_state(), _zero_state(), _zero_state(),
+        _zero_state(), _zero_state(), _zero_state(),
+        json.dumps({"should_close": True, "reason": "fade out signaled"}),
+        pov_payload,
+        pov_payload_guest,
+    ]
+
+    class _CancelOnStreamMock:
+        """Mock LLM client that serves ``generate`` from a canned queue
+        and raises CancelledError on the FIRST iteration of ``stream``.
+
+        Mirrors :class:`chat.llm.mock.MockLLMClient` for ``generate`` but
+        diverges on ``stream`` to simulate a mid-stream cancel.
+        """
+
+        def __init__(self, canned: list[str]) -> None:
+            self._canned = list(canned)
+
+        async def generate(
+            self, messages: Sequence, *, model: str, **params
+        ) -> str:
+            return self._canned.pop(0)
+
+        async def stream(
+            self, messages: Sequence, *, model: str, **params
+        ) -> AsyncIterator[str]:
+            # Yield a CancelledError on first iteration to simulate the
+            # /turns/cancel route firing mid-stream.
+            raise asyncio.CancelledError
+            yield  # pragma: no cover — keeps this an async generator.
+
+    from chat.web.kickoff import get_llm_client
+
+    mock = _CancelOnStreamMock(canned=list(canned))
+    app.dependency_overrides[get_llm_client] = lambda: mock
+
+    try:
+        # FastAPI/Starlette handles the re-raised CancelledError as an
+        # internal failure — TestClient surfaces it as a 500 response.
+        # We don't assert on the status here; the regression is whether
+        # the scene_closed event still landed in the event_log.
+        try:
+            app_state_setup.post(
+                "/chats/chat_bot_a/turns",
+                data={"prose": "we are done here, fade out"},
+            )
+        except BaseException:
+            # Some Starlette/asyncio versions propagate the
+            # CancelledError out of the test client; that's fine — the
+            # partial-record + scene-close still ran before the raise.
+            pass
+    finally:
+        app.dependency_overrides.clear()
+
+    with open_db(tmp_path / "test.db") as conn:
+        scene_close_count = conn.execute(
+            "SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'"
+        ).fetchone()[0]
+        assistant_payload = conn.execute(
+            "SELECT payload_json FROM event_log "
+            "WHERE kind = 'assistant_turn' ORDER BY id"
+        ).fetchall()
+
+    # Scene close lands despite the cancel.
+    assert scene_close_count == 1
+    # The cancelled assistant_turn was still recorded (truncated=True).
+    assert len(assistant_payload) == 1
+    assert json.loads(assistant_payload[0][0])["truncated"] is True
+
+
 def test_interjection_enqueues_significance_job(app_state_setup, tmp_path):
     """T74.2: when an interjection fires, the interjection memory is
     enqueued for significance scoring just like the primary memory.

From 6d98728a2eb1146d3fb907fb188584a1d2fabefc Mon Sep 17 00:00:00 2001
From: Joseph Doherty <dohejw01@gmail.com>
Date: Sun, 26 Apr 2026 17:40:46 -0400
Subject: [PATCH 4/4] chore: remove defensive stale-guest degrade in turns.py
 (T74.4)

T44 carried a defensive degrade-to-1:1 block in post_turn for the
case where chat.guest_bot_id pointed at a deleted bot. T47 then
fixed the root cause by adding a bot_reset cascade that clears
guest_bot_id from any chat that referenced the deleted bot, so the
post_turn defensive block was rendered dead.

Remove the orphan-clear branch and replace it with a comment
documenting that get_bot now returns a real row when guest_bot_id
is non-None. The cascade behavior is pinned by
test_reset_clears_guest_reference_in_other_chats in tests/test_reset.py.
---
 chat/web/turns.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/chat/web/turns.py b/chat/web/turns.py
index 48882f0..b3a4f0e 100644
--- a/chat/web/turns.py
+++ b/chat/web/turns.py
@@ -236,11 +236,12 @@ async def post_turn(
     guest_bot = None
     guest_bot_id = chat.get("guest_bot_id")
     if guest_bot_id is not None:
+        # T47's bot_reset cascade clears guest_bot_id from any chat that
+        # referenced the deleted bot, so by the time we read it here it's
+        # either None or a live bot id. The previous defensive
+        # degrade-to-1:1 block (T44) was rendered dead by T47 and removed
+        # in T74.4 — get_bot now returns a real row.
         guest_bot = get_bot(conn, guest_bot_id)
-        # If the chat references a deleted guest we degrade to single-bot
-        # rather than 404 — the chat is still usable as a 1:1.
-        if guest_bot is None:
-            guest_bot_id = None
 
     settings = request.app.state.settings