5aab98e4d7
The kickoff parse-and-confirm route was 500-ing intermittently because
Hermes-3 + Featherless's response_format={"type":"json_object"} only
guarantees JSON output, NOT a particular schema. The model was inventing
its own field names (sceneTime, entities, settingDetails) instead of
the KickoffParse fields, causing Pydantic validation to fail on both
classify() retries.
Three changes:
1. Include the Pydantic JSON schema in the system prompt so the model
knows exactly which keys to produce. Affects every classify() call
(kickoff parse, turn parse, scene-close detect, significance,
state-update, scene summarize). Strip ```json fences if the model
wraps its output. Bump retries 2 → 3 (model is stochastic; one extra
attempt closes most of the remaining gap).
2. parse_kickoff() now passes a default empty KickoffParse so the
route degrades to a fillable form instead of 500 when the classifier
ultimately fails. The confirm form is the human-in-the-loop; an
empty form is strictly better UX than a stack trace.
3. Tests updated: bumped canned-failure arrays from 2 → 3 entries to
match the new attempt count; renamed kickoff test from
"raises_when_classifier_fails_twice" to
"falls_back_to_empty_when_classifier_fails" reflecting the new
degraded-but-usable behavior.
Verified live with all 3 sample bots (maya/eli/sam) — kickoff route
returns 200 across multiple attempts. Full suite: 168 passed.
143 lines
5.2 KiB
Python
143 lines
5.2 KiB
Python
import json
|
|
import pytest
|
|
|
|
from chat.llm.mock import MockLLMClient
|
|
from chat.services.kickoff import (
|
|
ActivityShape,
|
|
KickoffParse,
|
|
parse_kickoff,
|
|
)
|
|
|
|
|
|
def _full_kickoff_json() -> str:
|
|
return json.dumps(
|
|
{
|
|
"container_name": "office bullpen, late evening",
|
|
"container_type": "office",
|
|
"container_properties": {
|
|
"moving": False,
|
|
"public": False,
|
|
"audible_range": "room",
|
|
},
|
|
"you_activity": {
|
|
"posture": "sitting at your desk",
|
|
"action_verb": "finishing emails",
|
|
"action_interruptible": True,
|
|
"action_required_attention": "low",
|
|
"action_expected_duration": "15m",
|
|
"attention": "the screen",
|
|
"holding": ["coffee mug"],
|
|
},
|
|
"bot_activity": {
|
|
"posture": "sitting at her desk",
|
|
"action_verb": "pretending to work",
|
|
"action_interruptible": True,
|
|
"action_required_attention": "low",
|
|
"action_expected_duration": "indefinite",
|
|
"attention": "you, in glances",
|
|
"holding": [],
|
|
},
|
|
"initial_time_iso": "2026-04-26T19:42:00",
|
|
"edge_seed_summary": "coworkers; aware of each other; no shared history beyond the office",
|
|
"edge_seed_knowledge_facts": [
|
|
"they work on the same floor",
|
|
"it is unusual to be the only two left",
|
|
],
|
|
}
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_parse_kickoff_happy_path_populates_fields():
|
|
mock = MockLLMClient(canned=[_full_kickoff_json()])
|
|
result = await parse_kickoff(
|
|
mock,
|
|
model="m",
|
|
bot_name="BotA",
|
|
bot_persona="reserved colleague who quietly notices things",
|
|
initial_relationship_to_you="coworker, slight crush, never voiced",
|
|
kickoff_prose=(
|
|
"you stay late at the office; only you and BotA are there; "
|
|
"she's at her desk pretending to work"
|
|
),
|
|
you_name="You",
|
|
)
|
|
assert isinstance(result, KickoffParse)
|
|
assert result.container_name == "office bullpen, late evening"
|
|
assert result.container_type == "office"
|
|
assert isinstance(result.you_activity, ActivityShape)
|
|
assert result.you_activity.posture == "sitting at your desk"
|
|
assert result.bot_activity.action_verb == "pretending to work"
|
|
assert result.edge_seed_summary.startswith("coworkers")
|
|
assert "they work on the same floor" in result.edge_seed_knowledge_facts
|
|
assert result.initial_time_iso == "2026-04-26T19:42:00"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_parse_kickoff_applies_activity_defaults_for_missing_fields():
|
|
minimal_payload = {
|
|
"container_name": "kitchen",
|
|
"container_type": "kitchen",
|
|
"container_properties": {},
|
|
"you_activity": {
|
|
"posture": "standing",
|
|
"action_verb": "boiling water",
|
|
"action_interruptible": True,
|
|
"action_required_attention": "low",
|
|
"action_expected_duration": "5m",
|
|
},
|
|
"bot_activity": {
|
|
"posture": "leaning on the counter",
|
|
"action_verb": "scrolling phone",
|
|
"action_interruptible": True,
|
|
"action_required_attention": "low",
|
|
"action_expected_duration": "10m",
|
|
},
|
|
"initial_time_iso": "2026-04-26T08:00:00",
|
|
"edge_seed_summary": "roommates",
|
|
"edge_seed_knowledge_facts": [],
|
|
}
|
|
mock = MockLLMClient(canned=[json.dumps(minimal_payload)])
|
|
result = await parse_kickoff(
|
|
mock,
|
|
model="m",
|
|
bot_name="BotA",
|
|
bot_persona="laid-back roommate",
|
|
initial_relationship_to_you="roommates of two years",
|
|
kickoff_prose="morning in the kitchen; you're making tea while BotA scrolls her phone",
|
|
you_name="You",
|
|
)
|
|
assert result.you_activity.attention == ""
|
|
assert result.you_activity.holding == []
|
|
assert result.bot_activity.attention == ""
|
|
assert result.bot_activity.holding == []
|
|
# mutating one default must not leak into the other (default_factory check)
|
|
result.you_activity.holding.append("kettle")
|
|
assert result.bot_activity.holding == []
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_parse_kickoff_falls_back_to_empty_when_classifier_fails():
|
|
"""When the classifier fails three times, return an empty KickoffParse
|
|
instead of raising — the confirm form lets the user fill in by hand.
|
|
"""
|
|
mock = MockLLMClient(canned=["nope", "still nope", "still bad"])
|
|
result = await parse_kickoff(
|
|
mock,
|
|
model="m",
|
|
bot_name="BotA",
|
|
bot_persona="x",
|
|
initial_relationship_to_you="y",
|
|
kickoff_prose="z",
|
|
you_name="You",
|
|
)
|
|
assert isinstance(result, KickoffParse)
|
|
assert result.container_name == ""
|
|
assert result.container_type == ""
|
|
assert result.edge_seed_summary == ""
|
|
assert result.edge_seed_knowledge_facts == []
|
|
# Activity defaults sane (action_interruptible defaults to True so the
|
|
# confirm form's checkbox is in a reasonable initial state).
|
|
assert result.you_activity.action_interruptible is True
|
|
assert result.bot_activity.action_interruptible is True
|