5aab98e4d7
The kickoff parse-and-confirm route was 500-ing intermittently because
Hermes-3 + Featherless's response_format={"type":"json_object"} only
guarantees JSON output, NOT a particular schema. The model was inventing
its own field names (sceneTime, entities, settingDetails) instead of
the KickoffParse fields, causing Pydantic validation to fail on both
classify() retries.
Three changes:
1. Include the Pydantic JSON schema in the system prompt so the model
knows exactly which keys to produce. Affects every classify() call
(kickoff parse, turn parse, scene-close detect, significance,
state-update, scene summarize). Strip ```json fences if the model
wraps its output. Bump retries 2 → 3 (model is stochastic; one extra
attempt closes most of the remaining gap).
2. parse_kickoff() now passes a default empty KickoffParse so the
route degrades to a fillable form instead of 500 when the classifier
ultimately fails. The confirm form is the human-in-the-loop; an
empty form is strictly better UX than a stack trace.
3. Tests updated: bumped canned-failure arrays from 2 → 3 entries to
match the new attempt count; renamed kickoff test from
"raises_when_classifier_fails_twice" to
"falls_back_to_empty_when_classifier_fails" reflecting the new
degraded-but-usable behavior.
Verified live with all 3 sample bots (maya/eli/sam) — kickoff route
returns 200 across multiple attempts. Full suite: 168 passed.
261 lines
8.4 KiB
Python
261 lines
8.4 KiB
Python
"""Per-POV summary and edge summary update on scene close (T27).
|
|
|
|
When a scene closes (via the auto-close path in the turn flow or the
|
|
manual button in the drawer), we run a classifier that produces a
|
|
per-POV summary for each present witness — Phase 1 single-bot only the
|
|
host bot, since "you" doesn't have a memory store in v1. The output
|
|
drives three projected updates:
|
|
|
|
1. Each ``memories`` row for the closed scene owned by the host bot has
|
|
its ``pov_summary`` rewritten via ``manual_edit`` events
|
|
(``target_kind="memory_pov_summary"``) so the field carries a proper
|
|
scene-level summary instead of the per-turn raw narrative seeded by
|
|
T21.
|
|
2. The directed bot->you ``edges.summary`` is updated via a new
|
|
``manual_edit`` target_kind ``edge_summary``. v1 strategy combines
|
|
the prior summary with the classifier's ``relationship_summary``
|
|
field; the LLM is the one phrasing the merge.
|
|
3. Newly-learned facts from the classifier's ``knowledge_facts`` field
|
|
are appended via the existing ``edge_update`` event handler.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from chat.db.connection import open_db
|
|
from chat.db.migrate import apply_migrations
|
|
from chat.eventlog.log import append_event
|
|
from chat.eventlog.projector import project
|
|
from chat.llm.mock import MockLLMClient
|
|
from chat.services.scene_summarize import (
|
|
ScenePOVSummary,
|
|
apply_scene_close_summary,
|
|
summarize_scene,
|
|
)
|
|
|
|
# Importing for handler-registration side effects so the freshly-migrated
|
|
# DB created in each test below has the projector ready.
|
|
import chat.state.edges # noqa: F401
|
|
import chat.state.entities # noqa: F401
|
|
import chat.state.manual_edit # noqa: F401
|
|
import chat.state.memory # noqa: F401
|
|
import chat.state.world # noqa: F401
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Service-level tests — no FastAPI involvement.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_summarize_scene_parses_classifier_output():
|
|
canned = json.dumps(
|
|
{
|
|
"summary": "BotA shared a quiet moment with you in the office.",
|
|
"knowledge_facts": ["You like coffee black."],
|
|
"relationship_summary": "BotA feels closer to you after this conversation.",
|
|
}
|
|
)
|
|
mock = MockLLMClient(canned=[canned])
|
|
result = await summarize_scene(
|
|
mock,
|
|
model="x",
|
|
bot_name="BotA",
|
|
bot_persona="thoughtful",
|
|
you_name="Me",
|
|
prior_edge_summary="",
|
|
dialogue=[
|
|
{"speaker": "Me", "text": "hi"},
|
|
{"speaker": "BotA", "text": "Hello!"},
|
|
],
|
|
)
|
|
assert isinstance(result, ScenePOVSummary)
|
|
assert result.summary.startswith("BotA shared")
|
|
assert result.knowledge_facts == ["You like coffee black."]
|
|
assert "closer" in result.relationship_summary
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_summarize_scene_default_on_failure():
|
|
"""Two consecutive non-JSON returns trip the classifier's retry-then-default
|
|
path; we should get the empty fallback rather than crashing the close
|
|
flow."""
|
|
mock = MockLLMClient(canned=["bad", "still bad", "bad3"])
|
|
result = await summarize_scene(
|
|
mock,
|
|
model="x",
|
|
bot_name="BotA",
|
|
bot_persona="",
|
|
you_name="Me",
|
|
prior_edge_summary="",
|
|
dialogue=[],
|
|
)
|
|
assert result.summary == ""
|
|
assert result.knowledge_facts == []
|
|
assert result.relationship_summary == ""
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_apply_scene_close_summary_updates_memories_and_edge(tmp_path):
|
|
db = tmp_path / "t.db"
|
|
apply_migrations(db)
|
|
canned = json.dumps(
|
|
{
|
|
"summary": "BotA reassured you about the project deadline.",
|
|
"knowledge_facts": ["You are nervous about the deadline."],
|
|
"relationship_summary": "BotA showed quiet support.",
|
|
}
|
|
)
|
|
with open_db(db) as conn:
|
|
# Seed bot, you, chat, container, scene, edge, memory, dialogue.
|
|
append_event(
|
|
conn,
|
|
kind="bot_authored",
|
|
payload={
|
|
"id": "bot_a",
|
|
"name": "BotA",
|
|
"persona": "...",
|
|
"voice_samples": [],
|
|
"traits": [],
|
|
"backstory": "",
|
|
"initial_relationship_to_you": "",
|
|
"kickoff_prose": "",
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="you_authored",
|
|
payload={
|
|
"name": "Me",
|
|
"pronouns": "they/them",
|
|
"persona": "engineer",
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="chat_created",
|
|
payload={
|
|
"id": "chat_bot_a",
|
|
"host_bot_id": "bot_a",
|
|
"initial_time": "2026-04-26T20:00:00+00:00",
|
|
"narrative_anchor": "Day 1",
|
|
"weather": "",
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="container_created",
|
|
payload={
|
|
"chat_id": "chat_bot_a",
|
|
"name": "office",
|
|
"type": "workplace",
|
|
"properties": {},
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="scene_opened",
|
|
payload={
|
|
"chat_id": "chat_bot_a",
|
|
"container_id": 1,
|
|
"started_at": "2026-04-26T20:00:00+00:00",
|
|
"participants": ["you", "bot_a"],
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="edge_update",
|
|
payload={
|
|
"source_id": "bot_a",
|
|
"target_id": "you",
|
|
"chat_id": "chat_bot_a",
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="memory_written",
|
|
payload={
|
|
"owner_id": "bot_a",
|
|
"chat_id": "chat_bot_a",
|
|
"scene_id": 1,
|
|
"pov_summary": "Original raw narrative",
|
|
"witness_you": 1,
|
|
"witness_host": 1,
|
|
"witness_guest": 0,
|
|
"significance": 1,
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="user_turn",
|
|
payload={
|
|
"chat_id": "chat_bot_a",
|
|
"prose": "I'm nervous about the deadline",
|
|
"segments": [],
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="assistant_turn",
|
|
payload={
|
|
"chat_id": "chat_bot_a",
|
|
"speaker_id": "bot_a",
|
|
"text": "It's going to be okay.",
|
|
"truncated": False,
|
|
"user_turn_id": 1,
|
|
},
|
|
)
|
|
project(conn)
|
|
|
|
client = MockLLMClient(canned=[canned])
|
|
result = await apply_scene_close_summary(
|
|
conn,
|
|
client,
|
|
classifier_model="x",
|
|
chat_id="chat_bot_a",
|
|
scene_id=1,
|
|
host_bot_id="bot_a",
|
|
)
|
|
|
|
# Returned summary plumbs through.
|
|
assert "reassured" in result.summary
|
|
assert result.knowledge_facts == ["You are nervous about the deadline."]
|
|
|
|
# Memory pov_summary updated.
|
|
new_pov = conn.execute(
|
|
"SELECT pov_summary FROM memories "
|
|
"WHERE owner_id = 'bot_a' AND scene_id = 1"
|
|
).fetchone()[0]
|
|
assert "reassured" in new_pov
|
|
# And the manual_edit event was logged with prior_value capture.
|
|
edits = conn.execute(
|
|
"SELECT payload_json FROM event_log WHERE kind = 'manual_edit'"
|
|
).fetchall()
|
|
assert any(
|
|
json.loads(p[0]).get("target_kind") == "memory_pov_summary"
|
|
for p in edits
|
|
)
|
|
mem_edit = next(
|
|
json.loads(p[0])
|
|
for p in edits
|
|
if json.loads(p[0]).get("target_kind") == "memory_pov_summary"
|
|
)
|
|
assert mem_edit["prior_value"] == "Original raw narrative"
|
|
|
|
# Edge summary updated via manual_edit (target_kind="edge_summary").
|
|
from chat.state.edges import get_edge
|
|
|
|
edge = get_edge(conn, "bot_a", "you")
|
|
assert "support" in edge["summary"]
|
|
assert any(
|
|
json.loads(p[0]).get("target_kind") == "edge_summary"
|
|
for p in edits
|
|
)
|
|
|
|
# Knowledge fact appended via edge_update.
|
|
assert any("deadline" in fact for fact in edge["knowledge"])
|