From 0997562e7520ab5dd423c3b3f2fc175ca3e5a715 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 13:46:14 -0400 Subject: [PATCH] feat: scene close on hard signals with manual override --- chat/services/scene_close.py | 100 +++++++++++++ chat/templates/_drawer.html | 9 ++ chat/web/drawer.py | 40 +++++ chat/web/turns.py | 41 +++++- tests/test_memory_write.py | 7 + tests/test_scene_close.py | 278 +++++++++++++++++++++++++++++++++++ tests/test_turn_flow.py | 8 + 7 files changed, 482 insertions(+), 1 deletion(-) create mode 100644 chat/services/scene_close.py create mode 100644 tests/test_scene_close.py diff --git a/chat/services/scene_close.py b/chat/services/scene_close.py new file mode 100644 index 0000000..bfa049b --- /dev/null +++ b/chat/services/scene_close.py @@ -0,0 +1,100 @@ +"""Scene-close hard-signal detection (T26). + +A small classifier service that decides whether the user's prose narrates +a hard signal that should close the active scene. Hard signals (per +Requirements §7.2): + +* Container change parsed from prose ("we drove to the park", "we stepped + outside"). +* Explicit user pattern signaling end ("we're done here", "fade out", + "scene end"). + +NOT close signals: + +* Brief activity changes within the same container ("I sit down"). +* Future plans ("let's go to the park later"). + +The service returns a :class:`SceneCloseDecision`. The default on classifier +failure is ``should_close=False`` so the turn flow keeps moving — closing +on a misfire would be more disruptive than missing a real signal, and the +manual button in the drawer is always available as a fallback. + +Phase 2/3 will introduce automatic re-opening with the new container; for +T26 the close is one-way and the next user turn operates without an active +scene (the prompt assembler already tolerates this). +""" + +from __future__ import annotations + +from pydantic import BaseModel + +from chat.llm.classify import classify +from chat.llm.client import LLMClient + + +class SceneCloseDecision(BaseModel): + """Classifier verdict for scene-close detection. + + ``new_container_hint`` is captured opportunistically when the close + signal is a container change, but T26 doesn't act on it — Phase 2/3 + handles automatic re-opening at the new location. + """ + + should_close: bool = False + reason: str = "" + new_container_hint: str = "" + + +_SYSTEM = ( + "You decide whether a roleplay scene should close based on the user's " + "prose.\n" + "Close signals (return should_close=true):\n" + "- The prose narrates a CONTAINER CHANGE (moving to a different place, " + 'e.g. "we drove to the park", "we stepped outside").\n' + "- The prose has an EXPLICIT USER PATTERN signaling end " + '("we\'re done here", "fade out", "scene end").\n' + "\n" + "DO NOT close on:\n" + "- Brief activity changes within the same place " + '(e.g. "I sit down" — same room).\n' + "- Future plans " + '("let\'s go to the park later" — not yet).\n' + "\n" + 'Reply JSON: {"should_close": bool, "reason": str (short), ' + '"new_container_hint": str (optional name)}.' +) + + +async def detect_scene_close( + client: LLMClient, + *, + model: str, + prose: str, + current_container_name: str, + timeout_s: float = 10.0, +) -> SceneCloseDecision: + """Run the scene-close classifier on a single user turn. + + The current container name is passed in so the prompt can reason about + "different place" relative to the active scene rather than guessing. + On classifier failure (parse error twice), the returned decision is the + safe ``should_close=False`` default. + """ + user = ( + f"CURRENT CONTAINER: {current_container_name}\n" + f"\n" + f"PROSE:\n{prose}\n" + f"\n" + f"Decide whether to close the scene." + ) + return await classify( + client, + model=model, + system=_SYSTEM, + user=user, + schema=SceneCloseDecision, + default=SceneCloseDecision( + should_close=False, reason="fallback", new_container_hint="" + ), + timeout_s=timeout_s, + ) diff --git a/chat/templates/_drawer.html b/chat/templates/_drawer.html index 3ed1940..4ebfc22 100644 --- a/chat/templates/_drawer.html +++ b/chat/templates/_drawer.html @@ -16,6 +16,15 @@

No active container.

{% endif %}

Time: {{ chat.time }}

+ {% if scene %} +
+ +
+ {% else %} +

No active scene.

+ {% endif %}
diff --git a/chat/web/drawer.py b/chat/web/drawer.py index 580c956..1d4e0e2 100644 --- a/chat/web/drawer.py +++ b/chat/web/drawer.py @@ -135,6 +135,46 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)): # HTMX swaps into ``#drawer``. +@router.post( + "/chats/{chat_id}/drawer/scene/close", + response_class=HTMLResponse, +) +async def close_scene_manual( + chat_id: str, + request: Request, + conn=Depends(get_conn), +): + """Manual scene close from the drawer button. + + Always available when there's an active scene; mirrors the auto-close + path in the turn flow but bypasses the classifier. Returns the refreshed + drawer partial so HTMX swaps it in. ``400`` when no scene is active — + the button is hidden in that state but a stale tab might still POST. + """ + chat = get_chat(conn, chat_id) + if chat is None: + raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}") + + scene = active_scene(conn, chat_id) + if scene is None: + raise HTTPException( + status_code=400, detail="no active scene to close" + ) + + append_and_apply( + conn, + kind="scene_closed", + payload={ + "scene_id": scene["id"], + "ended_at": chat.get("time"), + # T27 will set this from the per-POV summary pass; for T26 we + # default to 0 so the projector update has a value to write. + "significance": 0, + }, + ) + return await drawer(chat_id, request, conn) + + @router.post( "/chats/{chat_id}/drawer/edge/{source_id}/{target_id}/affinity", response_class=HTMLResponse, diff --git a/chat/web/turns.py b/chat/web/turns.py index b5cca0b..61a1e05 100644 --- a/chat/web/turns.py +++ b/chat/web/turns.py @@ -42,11 +42,12 @@ from chat.eventlog.log import append_and_apply, append_event from chat.services.background import SignificanceJob from chat.services.memory_write import record_turn_memory from chat.services.prompt import assemble_narrative_prompt +from chat.services.scene_close import detect_scene_close from chat.services.state_update import compute_state_update from chat.services.turn_parse import ParsedTurn, parse_turn from chat.state.edges import get_edge from chat.state.entities import get_bot, get_you -from chat.state.world import active_scene, get_chat +from chat.state.world import active_scene, get_chat, get_container from chat.web.bots import get_conn from chat.web.kickoff import get_llm_client from chat.web.pubsub import publish @@ -331,6 +332,44 @@ async def post_turn( ) ) + # 6d. Scene-close detection (Plan §7.2, T26). Runs AFTER assistant_turn + # so the bot's response is the closing scene's final beat — closing + # before narrative would force the bot to speak "in no scene", which + # is awkward. Hard signals only in Phase 1: container change parsed + # from prose, or explicit "fade out" / "we're done here" patterns. + # On classifier failure the service returns ``should_close=False`` + # so the turn flow keeps moving; the manual close button in the + # drawer is the always-available fallback. + # + # Skip empty prose — no signal to classify and no point spending a + # round-trip. Skip when there's no active scene (e.g. after a prior + # close in the same chat) — we have nothing to close. T13 (kickoff) + # is the only scene-opener path in v1; Phase 2-3 will handle + # automatic re-opening with the next container. + if scene is not None and prose.strip(): + container = None + if scene.get("container_id") is not None: + container = get_container(conn, scene["container_id"]) + container_name = container["name"] if container else "unknown" + decision = await detect_scene_close( + client, + model=settings.classifier_model, + prose=prose, + current_container_name=container_name, + ) + if decision.should_close: + append_and_apply( + conn, + kind="scene_closed", + payload={ + "scene_id": scene["id"], + "ended_at": chat.get("time"), + # T27 will set significance from the per-POV summary + # pass; T26 just emits the close event with a default. + "significance": 0, + }, + ) + # 7. Broadcast a JSON completion event (for JS consumers) and an HTML # fragment event (for HTMX SSE swap-into-timeline). await publish( diff --git a/tests/test_memory_write.py b/tests/test_memory_write.py index feaf6f5..aa87610 100644 --- a/tests/test_memory_write.py +++ b/tests/test_memory_write.py @@ -156,6 +156,12 @@ def client(tmp_path, monkeypatch): canned_state_update = json.dumps( {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []} ) + # T26 scene-close detection runs after the state-update pass. ``_seed_full`` + # below doesn't open a scene so the classifier call is short-circuited in + # turns.py — but the canned slot stays in place to document the order. + canned_scene_close = json.dumps( + {"should_close": False, "reason": "no signal"} + ) from chat.web.kickoff import get_llm_client @@ -165,6 +171,7 @@ def client(tmp_path, monkeypatch): canned_response, canned_state_update, canned_state_update, + canned_scene_close, ] ) app.dependency_overrides[get_llm_client] = lambda: mock diff --git a/tests/test_scene_close.py b/tests/test_scene_close.py new file mode 100644 index 0000000..8447d24 --- /dev/null +++ b/tests/test_scene_close.py @@ -0,0 +1,278 @@ +"""Scene close on hard signals + manual override (T26). + +A small classifier service decides whether the user's prose narrates a +"hard signal" that should close the active scene (container change, +explicit "fade out" / "we're done here" patterns). Wired into the turn +flow AFTER the assistant_turn so the bot's response is the final beat in +the closing scene. The drawer also exposes a manual "Close scene" button +that always fires a ``scene_closed`` event. + +Per Task 26 we DO NOT auto-open a new scene on close — the next +interaction either lives in a fresh chat or operates without an active +scene; the prompt assembler already tolerates ``active_scene == None``. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from chat.app import app +from chat.db.connection import open_db +from chat.eventlog.log import append_event +from chat.eventlog.projector import project +from chat.llm.mock import MockLLMClient +from chat.services.scene_close import detect_scene_close + + +# --------------------------------------------------------------------------- +# Service-level tests (no FastAPI involvement). +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_detect_scene_close_returns_decision(): + canned = json.dumps( + { + "should_close": True, + "reason": "container change", + "new_container_hint": "park", + } + ) + mock = MockLLMClient(canned=[canned]) + decision = await detect_scene_close( + mock, + model="x", + prose="we drove to the park", + current_container_name="office", + ) + assert decision.should_close is True + assert "container" in decision.reason + + +@pytest.mark.asyncio +async def test_detect_scene_close_default_on_failure(): + """Two consecutive non-JSON returns trip the classifier's retry-then-default + path; we should get the safe ``should_close=False`` fallback rather than + crashing the turn flow.""" + mock = MockLLMClient(canned=["nope", "still nope"]) + decision = await detect_scene_close( + mock, + model="x", + prose="anything", + current_container_name="office", + ) + assert decision.should_close is False + + +# --------------------------------------------------------------------------- +# HTTP integration: turn flow + manual close. +# --------------------------------------------------------------------------- + + +@pytest.fixture +def client(tmp_path, monkeypatch): + cfg = tmp_path / "config.toml" + cfg.write_text('featherless_api_key = "test"\n') + monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg)) + db = tmp_path / "test.db" + monkeypatch.setenv("CHAT_DB_PATH", str(db)) + + # Order of canned responses for one POST /turns: + # 1. parse_turn classifier + # 2. narrative streamer + # 3. state_update bot->you + # 4. state_update you->bot + # 5. detect_scene_close (runs AFTER assistant_turn — see turns.py) + parse_canned = json.dumps( + {"segments": [{"kind": "dialogue", "text": "hello"}]} + ) + narrative_canned = "BotA grins." + state_update_canned = json.dumps( + {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []} + ) + scene_close_canned = json.dumps( + { + "should_close": True, + "reason": "container change", + "new_container_hint": "park", + } + ) + + from chat.web.kickoff import get_llm_client + + mock = MockLLMClient( + canned=[ + parse_canned, + narrative_canned, + state_update_canned, + state_update_canned, + scene_close_canned, + ] + ) + app.dependency_overrides[get_llm_client] = lambda: mock + + with TestClient(app) as c: + # Same as other turn-flow tests: keep the async significance worker + # off so it doesn't try to call Featherless with the test API key. + app.state.background_worker.enabled = False + yield c + + app.dependency_overrides.clear() + + +def _seed(db_path: Path, *, with_scene: bool = True) -> None: + """Seed enough state for a full turn flow plus an active scene.""" + with open_db(db_path) as conn: + append_event( + conn, + kind="bot_authored", + payload={ + "id": "bot_a", + "name": "BotA", + "persona": "thoughtful, observant", + "voice_samples": [], + "traits": [], + "backstory": "", + "initial_relationship_to_you": "", + "kickoff_prose": "", + }, + ) + append_event( + conn, + kind="chat_created", + payload={ + "id": "chat_bot_a", + "host_bot_id": "bot_a", + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1", + "weather": "", + }, + ) + append_event( + conn, + kind="container_created", + payload={ + "chat_id": "chat_bot_a", + "name": "office", + "type": "workplace", + "properties": {}, + }, + ) + append_event( + conn, + kind="activity_change", + payload={ + "entity_id": "you", + "posture": "sitting", + "action": { + "verb": "thinking", + "interruptible": True, + "required_attention": "low", + "expected_duration": "ongoing", + }, + "attention": "", + "holding": [], + "status": {}, + }, + ) + append_event( + conn, + kind="activity_change", + payload={ + "entity_id": "bot_a", + "posture": "standing", + "action": { + "verb": "watching", + "interruptible": True, + "required_attention": "low", + "expected_duration": "ongoing", + }, + "attention": "", + "holding": [], + "status": {}, + }, + ) + append_event( + conn, + kind="edge_update", + payload={ + "source_id": "bot_a", + "target_id": "you", + "chat_id": "chat_bot_a", + "knowledge_facts": ["coworker"], + }, + ) + append_event( + conn, + kind="edge_update", + payload={ + "source_id": "you", + "target_id": "bot_a", + "chat_id": "chat_bot_a", + "knowledge_facts": [], + }, + ) + if with_scene: + append_event( + conn, + kind="scene_opened", + payload={ + "chat_id": "chat_bot_a", + "container_id": 1, + "started_at": "2026-04-26T20:00:00+00:00", + "participants": ["you", "bot_a"], + }, + ) + project(conn) + + +def test_post_turn_closes_scene_on_container_change(client, tmp_path): + _seed(tmp_path / "test.db") + response = client.post( + "/chats/chat_bot_a/turns", data={"prose": "we drove to the park"} + ) + assert response.status_code == 204 + + with open_db(tmp_path / "test.db") as conn: + # scene_closed event present. + cur = conn.execute( + "SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'" + ) + assert cur.fetchone()[0] == 1 + # Active scene cleared by the projector. + from chat.state.world import active_scene + + assert active_scene(conn, "chat_bot_a") is None + # Order: assistant_turn lands BEFORE scene_closed (the bot's reply is + # the closing scene's final beat). + cur = conn.execute( + "SELECT kind FROM event_log " + "WHERE kind IN ('assistant_turn', 'scene_closed') ORDER BY id" + ) + kinds = [r[0] for r in cur.fetchall()] + assert kinds == ["assistant_turn", "scene_closed"] + + +def test_manual_close_scene_button(client, tmp_path): + _seed(tmp_path / "test.db") + response = client.post("/chats/chat_bot_a/drawer/scene/close") + assert response.status_code == 200 + + with open_db(tmp_path / "test.db") as conn: + cur = conn.execute( + "SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'" + ) + assert cur.fetchone()[0] == 1 + from chat.state.world import active_scene + + assert active_scene(conn, "chat_bot_a") is None + + +def test_manual_close_400_when_no_active_scene(client, tmp_path): + _seed(tmp_path / "test.db", with_scene=False) + response = client.post("/chats/chat_bot_a/drawer/scene/close") + assert response.status_code == 400 diff --git a/tests/test_turn_flow.py b/tests/test_turn_flow.py index 44c3bfb..93f315f 100644 --- a/tests/test_turn_flow.py +++ b/tests/test_turn_flow.py @@ -43,6 +43,13 @@ def client(tmp_path, monkeypatch): canned_state_update = json.dumps( {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []} ) + # T26 scene-close detection runs after the state-update pass. These + # tests don't seed an active scene so the classifier is short-circuited + # in turns.py — but the canned slot is harmless to leave in place, + # and adding it documents the order even when the call doesn't fire. + canned_scene_close = json.dumps( + {"should_close": False, "reason": "no signal"} + ) # Import here so env vars are visible to the dependency lookup. from chat.web.kickoff import get_llm_client @@ -53,6 +60,7 @@ def client(tmp_path, monkeypatch): canned_response, canned_state_update, canned_state_update, + canned_scene_close, ] ) app.dependency_overrides[get_llm_client] = lambda: mock