feat: scene close on hard signals with manual override

2026-04-26 13:46:14 -04:00
parent db3005fc17
commit 0997562e75
7 changed files with 482 additions and 1 deletions
@@ -0,0 +1,100 @@
+"""Scene-close hard-signal detection (T26).
+
+A small classifier service that decides whether the user's prose narrates
+a hard signal that should close the active scene. Hard signals (per
+Requirements §7.2):
+
+* Container change parsed from prose ("we drove to the park", "we stepped
+  outside").
+* Explicit user pattern signaling end ("we're done here", "fade out",
+  "scene end").
+
+NOT close signals:
+
+* Brief activity changes within the same container ("I sit down").
+* Future plans ("let's go to the park later").
+
+The service returns a :class:`SceneCloseDecision`. The default on classifier
+failure is ``should_close=False`` so the turn flow keeps moving — closing
+on a misfire would be more disruptive than missing a real signal, and the
+manual button in the drawer is always available as a fallback.
+
+Phase 2/3 will introduce automatic re-opening with the new container; for
+T26 the close is one-way and the next user turn operates without an active
+scene (the prompt assembler already tolerates this).
+"""
+
+from __future__ import annotations
+
+from pydantic import BaseModel
+
+from chat.llm.classify import classify
+from chat.llm.client import LLMClient
+
+
+class SceneCloseDecision(BaseModel):
+    """Classifier verdict for scene-close detection.
+
+    ``new_container_hint`` is captured opportunistically when the close
+    signal is a container change, but T26 doesn't act on it — Phase 2/3
+    handles automatic re-opening at the new location.
+    """
+
+    should_close: bool = False
+    reason: str = ""
+    new_container_hint: str = ""
+
+
+_SYSTEM = (
+    "You decide whether a roleplay scene should close based on the user's "
+    "prose.\n"
+    "Close signals (return should_close=true):\n"
+    "- The prose narrates a CONTAINER CHANGE (moving to a different place, "
+    'e.g. "we drove to the park", "we stepped outside").\n'
+    "- The prose has an EXPLICIT USER PATTERN signaling end "
+    '("we\'re done here", "fade out", "scene end").\n'
+    "\n"
+    "DO NOT close on:\n"
+    "- Brief activity changes within the same place "
+    '(e.g. "I sit down" — same room).\n'
+    "- Future plans "
+    '("let\'s go to the park later" — not yet).\n'
+    "\n"
+    'Reply JSON: {"should_close": bool, "reason": str (short), '
+    '"new_container_hint": str (optional name)}.'
+)
+
+
+async def detect_scene_close(
+    client: LLMClient,
+    *,
+    model: str,
+    prose: str,
+    current_container_name: str,
+    timeout_s: float = 10.0,
+) -> SceneCloseDecision:
+    """Run the scene-close classifier on a single user turn.
+
+    The current container name is passed in so the prompt can reason about
+    "different place" relative to the active scene rather than guessing.
+    On classifier failure (parse error twice), the returned decision is the
+    safe ``should_close=False`` default.
+    """
+    user = (
+        f"CURRENT CONTAINER: {current_container_name}\n"
+        f"\n"
+        f"PROSE:\n{prose}\n"
+        f"\n"
+        f"Decide whether to close the scene."
+    )
+    return await classify(
+        client,
+        model=model,
+        system=_SYSTEM,
+        user=user,
+        schema=SceneCloseDecision,
+        default=SceneCloseDecision(
+            should_close=False, reason="fallback", new_container_hint=""
+        ),
+        timeout_s=timeout_s,
+    )
@@ -16,6 +16,15 @@
      <p class="muted">No active container.</p>
    {% endif %}
    <p>Time: {{ chat.time }}</p>
+    {% if scene %}
+      <form class="inline-edit"
+            hx-post="/chats/{{ chat.id }}/drawer/scene/close"
+            hx-target="#drawer" hx-swap="innerHTML">
+        <button type="submit">Close scene</button>
+      </form>
+    {% else %}
+      <p class="muted">No active scene.</p>
+    {% endif %}
  </section>

  <section class="drawer-section">
@@ -135,6 +135,46 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)):
 #      HTMX swaps into ``#drawer``.


+@router.post(
+    "/chats/{chat_id}/drawer/scene/close",
+    response_class=HTMLResponse,
+)
+async def close_scene_manual(
+    chat_id: str,
+    request: Request,
+    conn=Depends(get_conn),
+):
+    """Manual scene close from the drawer button.
+
+    Always available when there's an active scene; mirrors the auto-close
+    path in the turn flow but bypasses the classifier. Returns the refreshed
+    drawer partial so HTMX swaps it in. ``400`` when no scene is active —
+    the button is hidden in that state but a stale tab might still POST.
+    """
+    chat = get_chat(conn, chat_id)
+    if chat is None:
+        raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}")
+
+    scene = active_scene(conn, chat_id)
+    if scene is None:
+        raise HTTPException(
+            status_code=400, detail="no active scene to close"
+        )
+
+    append_and_apply(
+        conn,
+        kind="scene_closed",
+        payload={
+            "scene_id": scene["id"],
+            "ended_at": chat.get("time"),
+            # T27 will set this from the per-POV summary pass; for T26 we
+            # default to 0 so the projector update has a value to write.
+            "significance": 0,
+        },
+    )
+    return await drawer(chat_id, request, conn)
+
+
@router.post(
    "/chats/{chat_id}/drawer/edge/{source_id}/{target_id}/affinity",
    response_class=HTMLResponse,
@@ -42,11 +42,12 @@ from chat.eventlog.log import append_and_apply, append_event
 from chat.services.background import SignificanceJob
 from chat.services.memory_write import record_turn_memory
 from chat.services.prompt import assemble_narrative_prompt
+from chat.services.scene_close import detect_scene_close
 from chat.services.state_update import compute_state_update
 from chat.services.turn_parse import ParsedTurn, parse_turn
 from chat.state.edges import get_edge
 from chat.state.entities import get_bot, get_you
-from chat.state.world import active_scene, get_chat
+from chat.state.world import active_scene, get_chat, get_container
 from chat.web.bots import get_conn
 from chat.web.kickoff import get_llm_client
 from chat.web.pubsub import publish
@@ -331,6 +332,44 @@ async def post_turn(
            )
        )

+    # 6d. Scene-close detection (Plan §7.2, T26). Runs AFTER assistant_turn
+    # so the bot's response is the closing scene's final beat — closing
+    # before narrative would force the bot to speak "in no scene", which
+    # is awkward. Hard signals only in Phase 1: container change parsed
+    # from prose, or explicit "fade out" / "we're done here" patterns.
+    # On classifier failure the service returns ``should_close=False``
+    # so the turn flow keeps moving; the manual close button in the
+    # drawer is the always-available fallback.
+    #
+    # Skip empty prose — no signal to classify and no point spending a
+    # round-trip. Skip when there's no active scene (e.g. after a prior
+    # close in the same chat) — we have nothing to close. T13 (kickoff)
+    # is the only scene-opener path in v1; Phase 2-3 will handle
+    # automatic re-opening with the next container.
+    if scene is not None and prose.strip():
+        container = None
+        if scene.get("container_id") is not None:
+            container = get_container(conn, scene["container_id"])
+        container_name = container["name"] if container else "unknown"
+        decision = await detect_scene_close(
+            client,
+            model=settings.classifier_model,
+            prose=prose,
+            current_container_name=container_name,
+        )
+        if decision.should_close:
+            append_and_apply(
+                conn,
+                kind="scene_closed",
+                payload={
+                    "scene_id": scene["id"],
+                    "ended_at": chat.get("time"),
+                    # T27 will set significance from the per-POV summary
+                    # pass; T26 just emits the close event with a default.
+                    "significance": 0,
+                },
+            )
+
    # 7. Broadcast a JSON completion event (for JS consumers) and an HTML
    # fragment event (for HTMX SSE swap-into-timeline).
    await publish(
@@ -156,6 +156,12 @@ def client(tmp_path, monkeypatch):
    canned_state_update = json.dumps(
        {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
    )
+    # T26 scene-close detection runs after the state-update pass. ``_seed_full``
+    # below doesn't open a scene so the classifier call is short-circuited in
+    # turns.py — but the canned slot stays in place to document the order.
+    canned_scene_close = json.dumps(
+        {"should_close": False, "reason": "no signal"}
+    )

    from chat.web.kickoff import get_llm_client

@@ -165,6 +171,7 @@ def client(tmp_path, monkeypatch):
            canned_response,
            canned_state_update,
            canned_state_update,
+            canned_scene_close,
        ]
    )
    app.dependency_overrides[get_llm_client] = lambda: mock
@@ -0,0 +1,278 @@
+"""Scene close on hard signals + manual override (T26).
+
+A small classifier service decides whether the user's prose narrates a
+"hard signal" that should close the active scene (container change,
+explicit "fade out" / "we're done here" patterns). Wired into the turn
+flow AFTER the assistant_turn so the bot's response is the final beat in
+the closing scene. The drawer also exposes a manual "Close scene" button
+that always fires a ``scene_closed`` event.
+
+Per Task 26 we DO NOT auto-open a new scene on close — the next
+interaction either lives in a fresh chat or operates without an active
+scene; the prompt assembler already tolerates ``active_scene == None``.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+from fastapi.testclient import TestClient
+
+from chat.app import app
+from chat.db.connection import open_db
+from chat.eventlog.log import append_event
+from chat.eventlog.projector import project
+from chat.llm.mock import MockLLMClient
+from chat.services.scene_close import detect_scene_close
+
+
+# ---------------------------------------------------------------------------
+# Service-level tests (no FastAPI involvement).
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_detect_scene_close_returns_decision():
+    canned = json.dumps(
+        {
+            "should_close": True,
+            "reason": "container change",
+            "new_container_hint": "park",
+        }
+    )
+    mock = MockLLMClient(canned=[canned])
+    decision = await detect_scene_close(
+        mock,
+        model="x",
+        prose="we drove to the park",
+        current_container_name="office",
+    )
+    assert decision.should_close is True
+    assert "container" in decision.reason
+
+
+@pytest.mark.asyncio
+async def test_detect_scene_close_default_on_failure():
+    """Two consecutive non-JSON returns trip the classifier's retry-then-default
+    path; we should get the safe ``should_close=False`` fallback rather than
+    crashing the turn flow."""
+    mock = MockLLMClient(canned=["nope", "still nope"])
+    decision = await detect_scene_close(
+        mock,
+        model="x",
+        prose="anything",
+        current_container_name="office",
+    )
+    assert decision.should_close is False
+
+
+# ---------------------------------------------------------------------------
+# HTTP integration: turn flow + manual close.
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def client(tmp_path, monkeypatch):
+    cfg = tmp_path / "config.toml"
+    cfg.write_text('featherless_api_key = "test"\n')
+    monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
+    db = tmp_path / "test.db"
+    monkeypatch.setenv("CHAT_DB_PATH", str(db))
+
+    # Order of canned responses for one POST /turns:
+    #   1. parse_turn classifier
+    #   2. narrative streamer
+    #   3. state_update bot->you
+    #   4. state_update you->bot
+    #   5. detect_scene_close (runs AFTER assistant_turn — see turns.py)
+    parse_canned = json.dumps(
+        {"segments": [{"kind": "dialogue", "text": "hello"}]}
+    )
+    narrative_canned = "BotA grins."
+    state_update_canned = json.dumps(
+        {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
+    )
+    scene_close_canned = json.dumps(
+        {
+            "should_close": True,
+            "reason": "container change",
+            "new_container_hint": "park",
+        }
+    )
+
+    from chat.web.kickoff import get_llm_client
+
+    mock = MockLLMClient(
+        canned=[
+            parse_canned,
+            narrative_canned,
+            state_update_canned,
+            state_update_canned,
+            scene_close_canned,
+        ]
+    )
+    app.dependency_overrides[get_llm_client] = lambda: mock
+
+    with TestClient(app) as c:
+        # Same as other turn-flow tests: keep the async significance worker
+        # off so it doesn't try to call Featherless with the test API key.
+        app.state.background_worker.enabled = False
+        yield c
+
+    app.dependency_overrides.clear()
+
+
+def _seed(db_path: Path, *, with_scene: bool = True) -> None:
+    """Seed enough state for a full turn flow plus an active scene."""
+    with open_db(db_path) as conn:
+        append_event(
+            conn,
+            kind="bot_authored",
+            payload={
+                "id": "bot_a",
+                "name": "BotA",
+                "persona": "thoughtful, observant",
+                "voice_samples": [],
+                "traits": [],
+                "backstory": "",
+                "initial_relationship_to_you": "",
+                "kickoff_prose": "",
+            },
+        )
+        append_event(
+            conn,
+            kind="chat_created",
+            payload={
+                "id": "chat_bot_a",
+                "host_bot_id": "bot_a",
+                "initial_time": "2026-04-26T20:00:00+00:00",
+                "narrative_anchor": "Day 1",
+                "weather": "",
+            },
+        )
+        append_event(
+            conn,
+            kind="container_created",
+            payload={
+                "chat_id": "chat_bot_a",
+                "name": "office",
+                "type": "workplace",
+                "properties": {},
+            },
+        )
+        append_event(
+            conn,
+            kind="activity_change",
+            payload={
+                "entity_id": "you",
+                "posture": "sitting",
+                "action": {
+                    "verb": "thinking",
+                    "interruptible": True,
+                    "required_attention": "low",
+                    "expected_duration": "ongoing",
+                },
+                "attention": "",
+                "holding": [],
+                "status": {},
+            },
+        )
+        append_event(
+            conn,
+            kind="activity_change",
+            payload={
+                "entity_id": "bot_a",
+                "posture": "standing",
+                "action": {
+                    "verb": "watching",
+                    "interruptible": True,
+                    "required_attention": "low",
+                    "expected_duration": "ongoing",
+                },
+                "attention": "",
+                "holding": [],
+                "status": {},
+            },
+        )
+        append_event(
+            conn,
+            kind="edge_update",
+            payload={
+                "source_id": "bot_a",
+                "target_id": "you",
+                "chat_id": "chat_bot_a",
+                "knowledge_facts": ["coworker"],
+            },
+        )
+        append_event(
+            conn,
+            kind="edge_update",
+            payload={
+                "source_id": "you",
+                "target_id": "bot_a",
+                "chat_id": "chat_bot_a",
+                "knowledge_facts": [],
+            },
+        )
+        if with_scene:
+            append_event(
+                conn,
+                kind="scene_opened",
+                payload={
+                    "chat_id": "chat_bot_a",
+                    "container_id": 1,
+                    "started_at": "2026-04-26T20:00:00+00:00",
+                    "participants": ["you", "bot_a"],
+                },
+            )
+        project(conn)
+
+
+def test_post_turn_closes_scene_on_container_change(client, tmp_path):
+    _seed(tmp_path / "test.db")
+    response = client.post(
+        "/chats/chat_bot_a/turns", data={"prose": "we drove to the park"}
+    )
+    assert response.status_code == 204
+
+    with open_db(tmp_path / "test.db") as conn:
+        # scene_closed event present.
+        cur = conn.execute(
+            "SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'"
+        )
+        assert cur.fetchone()[0] == 1
+        # Active scene cleared by the projector.
+        from chat.state.world import active_scene
+
+        assert active_scene(conn, "chat_bot_a") is None
+        # Order: assistant_turn lands BEFORE scene_closed (the bot's reply is
+        # the closing scene's final beat).
+        cur = conn.execute(
+            "SELECT kind FROM event_log "
+            "WHERE kind IN ('assistant_turn', 'scene_closed') ORDER BY id"
+        )
+        kinds = [r[0] for r in cur.fetchall()]
+        assert kinds == ["assistant_turn", "scene_closed"]
+
+
+def test_manual_close_scene_button(client, tmp_path):
+    _seed(tmp_path / "test.db")
+    response = client.post("/chats/chat_bot_a/drawer/scene/close")
+    assert response.status_code == 200
+
+    with open_db(tmp_path / "test.db") as conn:
+        cur = conn.execute(
+            "SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'"
+        )
+        assert cur.fetchone()[0] == 1
+        from chat.state.world import active_scene
+
+        assert active_scene(conn, "chat_bot_a") is None
+
+
+def test_manual_close_400_when_no_active_scene(client, tmp_path):
+    _seed(tmp_path / "test.db", with_scene=False)
+    response = client.post("/chats/chat_bot_a/drawer/scene/close")
+    assert response.status_code == 400
@@ -43,6 +43,13 @@ def client(tmp_path, monkeypatch):
    canned_state_update = json.dumps(
        {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
    )
+    # T26 scene-close detection runs after the state-update pass. These
+    # tests don't seed an active scene so the classifier is short-circuited
+    # in turns.py — but the canned slot is harmless to leave in place,
+    # and adding it documents the order even when the call doesn't fire.
+    canned_scene_close = json.dumps(
+        {"should_close": False, "reason": "no signal"}
+    )

    # Import here so env vars are visible to the dependency lookup.
    from chat.web.kickoff import get_llm_client
@@ -53,6 +60,7 @@ def client(tmp_path, monkeypatch):
            canned_response,
            canned_state_update,
            canned_state_update,
+            canned_scene_close,
        ]
    )
    app.dependency_overrides[get_llm_client] = lambda: mock