feat: scene close on hard signals with manual override

This commit is contained in:
Joseph Doherty
2026-04-26 13:46:14 -04:00
parent db3005fc17
commit 0997562e75
7 changed files with 482 additions and 1 deletions
+100
View File
@@ -0,0 +1,100 @@
"""Scene-close hard-signal detection (T26).
A small classifier service that decides whether the user's prose narrates
a hard signal that should close the active scene. Hard signals (per
Requirements §7.2):
* Container change parsed from prose ("we drove to the park", "we stepped
outside").
* Explicit user pattern signaling end ("we're done here", "fade out",
"scene end").
NOT close signals:
* Brief activity changes within the same container ("I sit down").
* Future plans ("let's go to the park later").
The service returns a :class:`SceneCloseDecision`. The default on classifier
failure is ``should_close=False`` so the turn flow keeps moving — closing
on a misfire would be more disruptive than missing a real signal, and the
manual button in the drawer is always available as a fallback.
Phase 2/3 will introduce automatic re-opening with the new container; for
T26 the close is one-way and the next user turn operates without an active
scene (the prompt assembler already tolerates this).
"""
from __future__ import annotations
from pydantic import BaseModel
from chat.llm.classify import classify
from chat.llm.client import LLMClient
class SceneCloseDecision(BaseModel):
"""Classifier verdict for scene-close detection.
``new_container_hint`` is captured opportunistically when the close
signal is a container change, but T26 doesn't act on it — Phase 2/3
handles automatic re-opening at the new location.
"""
should_close: bool = False
reason: str = ""
new_container_hint: str = ""
_SYSTEM = (
"You decide whether a roleplay scene should close based on the user's "
"prose.\n"
"Close signals (return should_close=true):\n"
"- The prose narrates a CONTAINER CHANGE (moving to a different place, "
'e.g. "we drove to the park", "we stepped outside").\n'
"- The prose has an EXPLICIT USER PATTERN signaling end "
'("we\'re done here", "fade out", "scene end").\n'
"\n"
"DO NOT close on:\n"
"- Brief activity changes within the same place "
'(e.g. "I sit down" — same room).\n'
"- Future plans "
'("let\'s go to the park later" — not yet).\n'
"\n"
'Reply JSON: {"should_close": bool, "reason": str (short), '
'"new_container_hint": str (optional name)}.'
)
async def detect_scene_close(
client: LLMClient,
*,
model: str,
prose: str,
current_container_name: str,
timeout_s: float = 10.0,
) -> SceneCloseDecision:
"""Run the scene-close classifier on a single user turn.
The current container name is passed in so the prompt can reason about
"different place" relative to the active scene rather than guessing.
On classifier failure (parse error twice), the returned decision is the
safe ``should_close=False`` default.
"""
user = (
f"CURRENT CONTAINER: {current_container_name}\n"
f"\n"
f"PROSE:\n{prose}\n"
f"\n"
f"Decide whether to close the scene."
)
return await classify(
client,
model=model,
system=_SYSTEM,
user=user,
schema=SceneCloseDecision,
default=SceneCloseDecision(
should_close=False, reason="fallback", new_container_hint=""
),
timeout_s=timeout_s,
)
+9
View File
@@ -16,6 +16,15 @@
<p class="muted">No active container.</p>
{% endif %}
<p>Time: {{ chat.time }}</p>
{% if scene %}
<form class="inline-edit"
hx-post="/chats/{{ chat.id }}/drawer/scene/close"
hx-target="#drawer" hx-swap="innerHTML">
<button type="submit">Close scene</button>
</form>
{% else %}
<p class="muted">No active scene.</p>
{% endif %}
</section>
<section class="drawer-section">
+40
View File
@@ -135,6 +135,46 @@ async def drawer(chat_id: str, request: Request, conn=Depends(get_conn)):
# HTMX swaps into ``#drawer``.
@router.post(
"/chats/{chat_id}/drawer/scene/close",
response_class=HTMLResponse,
)
async def close_scene_manual(
chat_id: str,
request: Request,
conn=Depends(get_conn),
):
"""Manual scene close from the drawer button.
Always available when there's an active scene; mirrors the auto-close
path in the turn flow but bypasses the classifier. Returns the refreshed
drawer partial so HTMX swaps it in. ``400`` when no scene is active —
the button is hidden in that state but a stale tab might still POST.
"""
chat = get_chat(conn, chat_id)
if chat is None:
raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}")
scene = active_scene(conn, chat_id)
if scene is None:
raise HTTPException(
status_code=400, detail="no active scene to close"
)
append_and_apply(
conn,
kind="scene_closed",
payload={
"scene_id": scene["id"],
"ended_at": chat.get("time"),
# T27 will set this from the per-POV summary pass; for T26 we
# default to 0 so the projector update has a value to write.
"significance": 0,
},
)
return await drawer(chat_id, request, conn)
@router.post(
"/chats/{chat_id}/drawer/edge/{source_id}/{target_id}/affinity",
response_class=HTMLResponse,
+40 -1
View File
@@ -42,11 +42,12 @@ from chat.eventlog.log import append_and_apply, append_event
from chat.services.background import SignificanceJob
from chat.services.memory_write import record_turn_memory
from chat.services.prompt import assemble_narrative_prompt
from chat.services.scene_close import detect_scene_close
from chat.services.state_update import compute_state_update
from chat.services.turn_parse import ParsedTurn, parse_turn
from chat.state.edges import get_edge
from chat.state.entities import get_bot, get_you
from chat.state.world import active_scene, get_chat
from chat.state.world import active_scene, get_chat, get_container
from chat.web.bots import get_conn
from chat.web.kickoff import get_llm_client
from chat.web.pubsub import publish
@@ -331,6 +332,44 @@ async def post_turn(
)
)
# 6d. Scene-close detection (Plan §7.2, T26). Runs AFTER assistant_turn
# so the bot's response is the closing scene's final beat — closing
# before narrative would force the bot to speak "in no scene", which
# is awkward. Hard signals only in Phase 1: container change parsed
# from prose, or explicit "fade out" / "we're done here" patterns.
# On classifier failure the service returns ``should_close=False``
# so the turn flow keeps moving; the manual close button in the
# drawer is the always-available fallback.
#
# Skip empty prose — no signal to classify and no point spending a
# round-trip. Skip when there's no active scene (e.g. after a prior
# close in the same chat) — we have nothing to close. T13 (kickoff)
# is the only scene-opener path in v1; Phase 2-3 will handle
# automatic re-opening with the next container.
if scene is not None and prose.strip():
container = None
if scene.get("container_id") is not None:
container = get_container(conn, scene["container_id"])
container_name = container["name"] if container else "unknown"
decision = await detect_scene_close(
client,
model=settings.classifier_model,
prose=prose,
current_container_name=container_name,
)
if decision.should_close:
append_and_apply(
conn,
kind="scene_closed",
payload={
"scene_id": scene["id"],
"ended_at": chat.get("time"),
# T27 will set significance from the per-POV summary
# pass; T26 just emits the close event with a default.
"significance": 0,
},
)
# 7. Broadcast a JSON completion event (for JS consumers) and an HTML
# fragment event (for HTMX SSE swap-into-timeline).
await publish(
+7
View File
@@ -156,6 +156,12 @@ def client(tmp_path, monkeypatch):
canned_state_update = json.dumps(
{"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
)
# T26 scene-close detection runs after the state-update pass. ``_seed_full``
# below doesn't open a scene so the classifier call is short-circuited in
# turns.py — but the canned slot stays in place to document the order.
canned_scene_close = json.dumps(
{"should_close": False, "reason": "no signal"}
)
from chat.web.kickoff import get_llm_client
@@ -165,6 +171,7 @@ def client(tmp_path, monkeypatch):
canned_response,
canned_state_update,
canned_state_update,
canned_scene_close,
]
)
app.dependency_overrides[get_llm_client] = lambda: mock
+278
View File
@@ -0,0 +1,278 @@
"""Scene close on hard signals + manual override (T26).
A small classifier service decides whether the user's prose narrates a
"hard signal" that should close the active scene (container change,
explicit "fade out" / "we're done here" patterns). Wired into the turn
flow AFTER the assistant_turn so the bot's response is the final beat in
the closing scene. The drawer also exposes a manual "Close scene" button
that always fires a ``scene_closed`` event.
Per Task 26 we DO NOT auto-open a new scene on close — the next
interaction either lives in a fresh chat or operates without an active
scene; the prompt assembler already tolerates ``active_scene == None``.
"""
from __future__ import annotations
import json
from pathlib import Path
import pytest
from fastapi.testclient import TestClient
from chat.app import app
from chat.db.connection import open_db
from chat.eventlog.log import append_event
from chat.eventlog.projector import project
from chat.llm.mock import MockLLMClient
from chat.services.scene_close import detect_scene_close
# ---------------------------------------------------------------------------
# Service-level tests (no FastAPI involvement).
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_detect_scene_close_returns_decision():
canned = json.dumps(
{
"should_close": True,
"reason": "container change",
"new_container_hint": "park",
}
)
mock = MockLLMClient(canned=[canned])
decision = await detect_scene_close(
mock,
model="x",
prose="we drove to the park",
current_container_name="office",
)
assert decision.should_close is True
assert "container" in decision.reason
@pytest.mark.asyncio
async def test_detect_scene_close_default_on_failure():
"""Two consecutive non-JSON returns trip the classifier's retry-then-default
path; we should get the safe ``should_close=False`` fallback rather than
crashing the turn flow."""
mock = MockLLMClient(canned=["nope", "still nope"])
decision = await detect_scene_close(
mock,
model="x",
prose="anything",
current_container_name="office",
)
assert decision.should_close is False
# ---------------------------------------------------------------------------
# HTTP integration: turn flow + manual close.
# ---------------------------------------------------------------------------
@pytest.fixture
def client(tmp_path, monkeypatch):
cfg = tmp_path / "config.toml"
cfg.write_text('featherless_api_key = "test"\n')
monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
db = tmp_path / "test.db"
monkeypatch.setenv("CHAT_DB_PATH", str(db))
# Order of canned responses for one POST /turns:
# 1. parse_turn classifier
# 2. narrative streamer
# 3. state_update bot->you
# 4. state_update you->bot
# 5. detect_scene_close (runs AFTER assistant_turn — see turns.py)
parse_canned = json.dumps(
{"segments": [{"kind": "dialogue", "text": "hello"}]}
)
narrative_canned = "BotA grins."
state_update_canned = json.dumps(
{"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
)
scene_close_canned = json.dumps(
{
"should_close": True,
"reason": "container change",
"new_container_hint": "park",
}
)
from chat.web.kickoff import get_llm_client
mock = MockLLMClient(
canned=[
parse_canned,
narrative_canned,
state_update_canned,
state_update_canned,
scene_close_canned,
]
)
app.dependency_overrides[get_llm_client] = lambda: mock
with TestClient(app) as c:
# Same as other turn-flow tests: keep the async significance worker
# off so it doesn't try to call Featherless with the test API key.
app.state.background_worker.enabled = False
yield c
app.dependency_overrides.clear()
def _seed(db_path: Path, *, with_scene: bool = True) -> None:
"""Seed enough state for a full turn flow plus an active scene."""
with open_db(db_path) as conn:
append_event(
conn,
kind="bot_authored",
payload={
"id": "bot_a",
"name": "BotA",
"persona": "thoughtful, observant",
"voice_samples": [],
"traits": [],
"backstory": "",
"initial_relationship_to_you": "",
"kickoff_prose": "",
},
)
append_event(
conn,
kind="chat_created",
payload={
"id": "chat_bot_a",
"host_bot_id": "bot_a",
"initial_time": "2026-04-26T20:00:00+00:00",
"narrative_anchor": "Day 1",
"weather": "",
},
)
append_event(
conn,
kind="container_created",
payload={
"chat_id": "chat_bot_a",
"name": "office",
"type": "workplace",
"properties": {},
},
)
append_event(
conn,
kind="activity_change",
payload={
"entity_id": "you",
"posture": "sitting",
"action": {
"verb": "thinking",
"interruptible": True,
"required_attention": "low",
"expected_duration": "ongoing",
},
"attention": "",
"holding": [],
"status": {},
},
)
append_event(
conn,
kind="activity_change",
payload={
"entity_id": "bot_a",
"posture": "standing",
"action": {
"verb": "watching",
"interruptible": True,
"required_attention": "low",
"expected_duration": "ongoing",
},
"attention": "",
"holding": [],
"status": {},
},
)
append_event(
conn,
kind="edge_update",
payload={
"source_id": "bot_a",
"target_id": "you",
"chat_id": "chat_bot_a",
"knowledge_facts": ["coworker"],
},
)
append_event(
conn,
kind="edge_update",
payload={
"source_id": "you",
"target_id": "bot_a",
"chat_id": "chat_bot_a",
"knowledge_facts": [],
},
)
if with_scene:
append_event(
conn,
kind="scene_opened",
payload={
"chat_id": "chat_bot_a",
"container_id": 1,
"started_at": "2026-04-26T20:00:00+00:00",
"participants": ["you", "bot_a"],
},
)
project(conn)
def test_post_turn_closes_scene_on_container_change(client, tmp_path):
_seed(tmp_path / "test.db")
response = client.post(
"/chats/chat_bot_a/turns", data={"prose": "we drove to the park"}
)
assert response.status_code == 204
with open_db(tmp_path / "test.db") as conn:
# scene_closed event present.
cur = conn.execute(
"SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'"
)
assert cur.fetchone()[0] == 1
# Active scene cleared by the projector.
from chat.state.world import active_scene
assert active_scene(conn, "chat_bot_a") is None
# Order: assistant_turn lands BEFORE scene_closed (the bot's reply is
# the closing scene's final beat).
cur = conn.execute(
"SELECT kind FROM event_log "
"WHERE kind IN ('assistant_turn', 'scene_closed') ORDER BY id"
)
kinds = [r[0] for r in cur.fetchall()]
assert kinds == ["assistant_turn", "scene_closed"]
def test_manual_close_scene_button(client, tmp_path):
_seed(tmp_path / "test.db")
response = client.post("/chats/chat_bot_a/drawer/scene/close")
assert response.status_code == 200
with open_db(tmp_path / "test.db") as conn:
cur = conn.execute(
"SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'"
)
assert cur.fetchone()[0] == 1
from chat.state.world import active_scene
assert active_scene(conn, "chat_bot_a") is None
def test_manual_close_400_when_no_active_scene(client, tmp_path):
_seed(tmp_path / "test.db", with_scene=False)
response = client.post("/chats/chat_bot_a/drawer/scene/close")
assert response.status_code == 400
+8
View File
@@ -43,6 +43,13 @@ def client(tmp_path, monkeypatch):
canned_state_update = json.dumps(
{"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
)
# T26 scene-close detection runs after the state-update pass. These
# tests don't seed an active scene so the classifier is short-circuited
# in turns.py — but the canned slot is harmless to leave in place,
# and adding it documents the order even when the call doesn't fire.
canned_scene_close = json.dumps(
{"should_close": False, "reason": "no signal"}
)
# Import here so env vars are visible to the dependency lookup.
from chat.web.kickoff import get_llm_client
@@ -53,6 +60,7 @@ def client(tmp_path, monkeypatch):
canned_response,
canned_state_update,
canned_state_update,
canned_scene_close,
]
)
app.dependency_overrides[get_llm_client] = lambda: mock