5aab98e4d7
The kickoff parse-and-confirm route was 500-ing intermittently because
Hermes-3 + Featherless's response_format={"type":"json_object"} only
guarantees JSON output, NOT a particular schema. The model was inventing
its own field names (sceneTime, entities, settingDetails) instead of
the KickoffParse fields, causing Pydantic validation to fail on both
classify() retries.
Three changes:
1. Include the Pydantic JSON schema in the system prompt so the model
knows exactly which keys to produce. Affects every classify() call
(kickoff parse, turn parse, scene-close detect, significance,
state-update, scene summarize). Strip ```json fences if the model
wraps its output. Bump retries 2 → 3 (model is stochastic; one extra
attempt closes most of the remaining gap).
2. parse_kickoff() now passes a default empty KickoffParse so the
route degrades to a fillable form instead of 500 when the classifier
ultimately fails. The confirm form is the human-in-the-loop; an
empty form is strictly better UX than a stack trace.
3. Tests updated: bumped canned-failure arrays from 2 → 3 entries to
match the new attempt count; renamed kickoff test from
"raises_when_classifier_fails_twice" to
"falls_back_to_empty_when_classifier_fails" reflecting the new
degraded-but-usable behavior.
Verified live with all 3 sample bots (maya/eli/sam) — kickoff route
returns 200 across multiple attempts. Full suite: 168 passed.
288 lines
9.1 KiB
Python
288 lines
9.1 KiB
Python
"""Scene close on hard signals + manual override (T26).
|
|
|
|
A small classifier service decides whether the user's prose narrates a
|
|
"hard signal" that should close the active scene (container change,
|
|
explicit "fade out" / "we're done here" patterns). Wired into the turn
|
|
flow AFTER the assistant_turn so the bot's response is the final beat in
|
|
the closing scene. The drawer also exposes a manual "Close scene" button
|
|
that always fires a ``scene_closed`` event.
|
|
|
|
Per Task 26 we DO NOT auto-open a new scene on close — the next
|
|
interaction either lives in a fresh chat or operates without an active
|
|
scene; the prompt assembler already tolerates ``active_scene == None``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
from chat.app import app
|
|
from chat.db.connection import open_db
|
|
from chat.eventlog.log import append_event
|
|
from chat.eventlog.projector import project
|
|
from chat.llm.mock import MockLLMClient
|
|
from chat.services.scene_close import detect_scene_close
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Service-level tests (no FastAPI involvement).
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_detect_scene_close_returns_decision():
|
|
canned = json.dumps(
|
|
{
|
|
"should_close": True,
|
|
"reason": "container change",
|
|
"new_container_hint": "park",
|
|
}
|
|
)
|
|
mock = MockLLMClient(canned=[canned])
|
|
decision = await detect_scene_close(
|
|
mock,
|
|
model="x",
|
|
prose="we drove to the park",
|
|
current_container_name="office",
|
|
)
|
|
assert decision.should_close is True
|
|
assert "container" in decision.reason
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_detect_scene_close_default_on_failure():
|
|
"""Two consecutive non-JSON returns trip the classifier's retry-then-default
|
|
path; we should get the safe ``should_close=False`` fallback rather than
|
|
crashing the turn flow."""
|
|
mock = MockLLMClient(canned=["nope", "still nope", "nope3"])
|
|
decision = await detect_scene_close(
|
|
mock,
|
|
model="x",
|
|
prose="anything",
|
|
current_container_name="office",
|
|
)
|
|
assert decision.should_close is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# HTTP integration: turn flow + manual close.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture
|
|
def client(tmp_path, monkeypatch):
|
|
cfg = tmp_path / "config.toml"
|
|
cfg.write_text('featherless_api_key = "test"\n')
|
|
monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
|
|
db = tmp_path / "test.db"
|
|
monkeypatch.setenv("CHAT_DB_PATH", str(db))
|
|
|
|
# Order of canned responses for one POST /turns:
|
|
# 1. parse_turn classifier
|
|
# 2. narrative streamer
|
|
# 3. state_update bot->you
|
|
# 4. state_update you->bot
|
|
# 5. detect_scene_close (runs AFTER assistant_turn — see turns.py)
|
|
# 6. summarize_scene (T27, runs only when scene-close fires)
|
|
parse_canned = json.dumps(
|
|
{"segments": [{"kind": "dialogue", "text": "hello"}]}
|
|
)
|
|
narrative_canned = "BotA grins."
|
|
state_update_canned = json.dumps(
|
|
{"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
|
|
)
|
|
scene_close_canned = json.dumps(
|
|
{
|
|
"should_close": True,
|
|
"reason": "container change",
|
|
"new_container_hint": "park",
|
|
}
|
|
)
|
|
pov_summary_canned = json.dumps(
|
|
{
|
|
"summary": "BotA noticed you leaving the office.",
|
|
"knowledge_facts": [],
|
|
"relationship_summary": "BotA wonders where you're headed.",
|
|
}
|
|
)
|
|
|
|
from chat.web.kickoff import get_llm_client
|
|
|
|
mock = MockLLMClient(
|
|
canned=[
|
|
parse_canned,
|
|
narrative_canned,
|
|
state_update_canned,
|
|
state_update_canned,
|
|
scene_close_canned,
|
|
pov_summary_canned,
|
|
]
|
|
)
|
|
app.dependency_overrides[get_llm_client] = lambda: mock
|
|
|
|
with TestClient(app) as c:
|
|
# Same as other turn-flow tests: keep the async significance worker
|
|
# off so it doesn't try to call Featherless with the test API key.
|
|
app.state.background_worker.enabled = False
|
|
yield c
|
|
|
|
app.dependency_overrides.clear()
|
|
|
|
|
|
def _seed(db_path: Path, *, with_scene: bool = True) -> None:
|
|
"""Seed enough state for a full turn flow plus an active scene."""
|
|
with open_db(db_path) as conn:
|
|
append_event(
|
|
conn,
|
|
kind="bot_authored",
|
|
payload={
|
|
"id": "bot_a",
|
|
"name": "BotA",
|
|
"persona": "thoughtful, observant",
|
|
"voice_samples": [],
|
|
"traits": [],
|
|
"backstory": "",
|
|
"initial_relationship_to_you": "",
|
|
"kickoff_prose": "",
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="chat_created",
|
|
payload={
|
|
"id": "chat_bot_a",
|
|
"host_bot_id": "bot_a",
|
|
"initial_time": "2026-04-26T20:00:00+00:00",
|
|
"narrative_anchor": "Day 1",
|
|
"weather": "",
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="container_created",
|
|
payload={
|
|
"chat_id": "chat_bot_a",
|
|
"name": "office",
|
|
"type": "workplace",
|
|
"properties": {},
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="activity_change",
|
|
payload={
|
|
"entity_id": "you",
|
|
"posture": "sitting",
|
|
"action": {
|
|
"verb": "thinking",
|
|
"interruptible": True,
|
|
"required_attention": "low",
|
|
"expected_duration": "ongoing",
|
|
},
|
|
"attention": "",
|
|
"holding": [],
|
|
"status": {},
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="activity_change",
|
|
payload={
|
|
"entity_id": "bot_a",
|
|
"posture": "standing",
|
|
"action": {
|
|
"verb": "watching",
|
|
"interruptible": True,
|
|
"required_attention": "low",
|
|
"expected_duration": "ongoing",
|
|
},
|
|
"attention": "",
|
|
"holding": [],
|
|
"status": {},
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="edge_update",
|
|
payload={
|
|
"source_id": "bot_a",
|
|
"target_id": "you",
|
|
"chat_id": "chat_bot_a",
|
|
"knowledge_facts": ["coworker"],
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="edge_update",
|
|
payload={
|
|
"source_id": "you",
|
|
"target_id": "bot_a",
|
|
"chat_id": "chat_bot_a",
|
|
"knowledge_facts": [],
|
|
},
|
|
)
|
|
if with_scene:
|
|
append_event(
|
|
conn,
|
|
kind="scene_opened",
|
|
payload={
|
|
"chat_id": "chat_bot_a",
|
|
"container_id": 1,
|
|
"started_at": "2026-04-26T20:00:00+00:00",
|
|
"participants": ["you", "bot_a"],
|
|
},
|
|
)
|
|
project(conn)
|
|
|
|
|
|
def test_post_turn_closes_scene_on_container_change(client, tmp_path):
|
|
_seed(tmp_path / "test.db")
|
|
response = client.post(
|
|
"/chats/chat_bot_a/turns", data={"prose": "we drove to the park"}
|
|
)
|
|
assert response.status_code == 204
|
|
|
|
with open_db(tmp_path / "test.db") as conn:
|
|
# scene_closed event present.
|
|
cur = conn.execute(
|
|
"SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'"
|
|
)
|
|
assert cur.fetchone()[0] == 1
|
|
# Active scene cleared by the projector.
|
|
from chat.state.world import active_scene
|
|
|
|
assert active_scene(conn, "chat_bot_a") is None
|
|
# Order: assistant_turn lands BEFORE scene_closed (the bot's reply is
|
|
# the closing scene's final beat).
|
|
cur = conn.execute(
|
|
"SELECT kind FROM event_log "
|
|
"WHERE kind IN ('assistant_turn', 'scene_closed') ORDER BY id"
|
|
)
|
|
kinds = [r[0] for r in cur.fetchall()]
|
|
assert kinds == ["assistant_turn", "scene_closed"]
|
|
|
|
|
|
def test_manual_close_scene_button(client, tmp_path):
|
|
_seed(tmp_path / "test.db")
|
|
response = client.post("/chats/chat_bot_a/drawer/scene/close")
|
|
assert response.status_code == 200
|
|
|
|
with open_db(tmp_path / "test.db") as conn:
|
|
cur = conn.execute(
|
|
"SELECT COUNT(*) FROM event_log WHERE kind = 'scene_closed'"
|
|
)
|
|
assert cur.fetchone()[0] == 1
|
|
from chat.state.world import active_scene
|
|
|
|
assert active_scene(conn, "chat_bot_a") is None
|
|
|
|
|
|
def test_manual_close_400_when_no_active_scene(client, tmp_path):
|
|
_seed(tmp_path / "test.db", with_scene=False)
|
|
response = client.post("/chats/chat_bot_a/drawer/scene/close")
|
|
assert response.status_code == 400
|