5aab98e4d7
The kickoff parse-and-confirm route was 500-ing intermittently because
Hermes-3 + Featherless's response_format={"type":"json_object"} only
guarantees JSON output, NOT a particular schema. The model was inventing
its own field names (sceneTime, entities, settingDetails) instead of
the KickoffParse fields, causing Pydantic validation to fail on both
classify() retries.
Three changes:
1. Include the Pydantic JSON schema in the system prompt so the model
knows exactly which keys to produce. Affects every classify() call
(kickoff parse, turn parse, scene-close detect, significance,
state-update, scene summarize). Strip ```json fences if the model
wraps its output. Bump retries 2 → 3 (model is stochastic; one extra
attempt closes most of the remaining gap).
2. parse_kickoff() now passes a default empty KickoffParse so the
route degrades to a fillable form instead of 500 when the classifier
ultimately fails. The confirm form is the human-in-the-loop; an
empty form is strictly better UX than a stack trace.
3. Tests updated: bumped canned-failure arrays from 2 → 3 entries to
match the new attempt count; renamed kickoff test from
"raises_when_classifier_fails_twice" to
"falls_back_to_empty_when_classifier_fails" reflecting the new
degraded-but-usable behavior.
Verified live with all 3 sample bots (maya/eli/sam) — kickoff route
returns 200 across multiple attempts. Full suite: 168 passed.
238 lines
7.5 KiB
Python
238 lines
7.5 KiB
Python
"""Async significance pass with auto-pin on score 3 (T22).
|
|
|
|
After ``assistant_turn`` lands the turn flow enqueues a SignificanceJob on
|
|
a background asyncio worker. The worker calls a classifier (per §11.1,
|
|
score 0-3) and writes a ``memory_significance_set`` event. On score 3 the
|
|
memory is auto-pinned and a soft cap of 8 pins per owner is enforced —
|
|
when the cap is exceeded the oldest auto-pin (excluding the just-pinned
|
|
row) is unpinned via another ``memory_pin_changed`` event.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
|
|
import pytest
|
|
|
|
from chat.config import load_settings
|
|
from chat.db.connection import open_db
|
|
from chat.db.migrate import apply_migrations
|
|
from chat.eventlog.log import append_event
|
|
from chat.eventlog.projector import project
|
|
from chat.llm.mock import MockLLMClient
|
|
from chat.services.background import BackgroundWorker, SignificanceJob
|
|
from chat.services.significance import compute_significance
|
|
|
|
# Trigger handler registration for projection.
|
|
import chat.state.entities # noqa: F401
|
|
import chat.state.memory # noqa: F401
|
|
import chat.state.world # noqa: F401
|
|
|
|
|
|
async def test_compute_significance_parses_score():
|
|
canned = json.dumps({"score": 2, "reason": "notable"})
|
|
mock = MockLLMClient(canned=[canned])
|
|
score = await compute_significance(
|
|
mock,
|
|
model="x",
|
|
narrative_text="...",
|
|
prior_dialogue=[],
|
|
)
|
|
assert score == 2
|
|
|
|
|
|
async def test_compute_significance_default_on_failure():
|
|
# Both attempts return non-JSON text; the classify wrapper falls back
|
|
# to the SignificanceVerdict default (score=1, "fallback").
|
|
mock = MockLLMClient(canned=["nope", "still nope", "nope3"])
|
|
score = await compute_significance(
|
|
mock,
|
|
model="x",
|
|
narrative_text="...",
|
|
prior_dialogue=[],
|
|
)
|
|
assert score == 1
|
|
|
|
|
|
async def test_background_worker_processes_job_and_updates_significance(
|
|
tmp_path, monkeypatch
|
|
):
|
|
cfg = tmp_path / "config.toml"
|
|
cfg.write_text('featherless_api_key = "test"\n')
|
|
monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
|
|
db = tmp_path / "test.db"
|
|
monkeypatch.setenv("CHAT_DB_PATH", str(db))
|
|
apply_migrations(db)
|
|
settings = load_settings()
|
|
|
|
# Seed bot, chat, memory.
|
|
with open_db(db) as conn:
|
|
append_event(
|
|
conn,
|
|
kind="bot_authored",
|
|
payload={
|
|
"id": "bot_a",
|
|
"name": "BotA",
|
|
"persona": "...",
|
|
"voice_samples": [],
|
|
"traits": [],
|
|
"backstory": "",
|
|
"initial_relationship_to_you": "",
|
|
"kickoff_prose": "",
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="chat_created",
|
|
payload={
|
|
"id": "chat_bot_a",
|
|
"host_bot_id": "bot_a",
|
|
"initial_time": "2026-04-26T20:00:00+00:00",
|
|
"narrative_anchor": "Day 1",
|
|
"weather": "",
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="memory_written",
|
|
payload={
|
|
"owner_id": "bot_a",
|
|
"chat_id": "chat_bot_a",
|
|
"pov_summary": "Some scene",
|
|
"witness_you": 1,
|
|
"witness_host": 1,
|
|
"witness_guest": 0,
|
|
"source": "direct",
|
|
"reliability": 1.0,
|
|
"significance": 1,
|
|
"pinned": 0,
|
|
"auto_pinned": 0,
|
|
},
|
|
)
|
|
project(conn)
|
|
memory_id = conn.execute(
|
|
"SELECT id FROM memories WHERE owner_id = 'bot_a'"
|
|
).fetchone()[0]
|
|
|
|
# Worker with mock LLM that returns score=3 (pivotal).
|
|
canned = [json.dumps({"score": 3, "reason": "pivotal"})]
|
|
factory = lambda: MockLLMClient(canned=list(canned))
|
|
worker = BackgroundWorker(settings, llm_client_factory=factory)
|
|
await worker.start()
|
|
worker.enqueue(
|
|
SignificanceJob(
|
|
memory_id=memory_id,
|
|
narrative_text="...",
|
|
prior_dialogue=[],
|
|
host_bot_id="bot_a",
|
|
)
|
|
)
|
|
# Drain via stop sentinel — guarantees the prior job completed.
|
|
await worker.stop()
|
|
|
|
# Verify significance updated AND memory auto-pinned.
|
|
with open_db(db) as conn:
|
|
row = conn.execute(
|
|
"SELECT significance, pinned, auto_pinned FROM memories "
|
|
"WHERE id = ?",
|
|
(memory_id,),
|
|
).fetchone()
|
|
assert row[0] == 3
|
|
assert row[1] == 1 # pinned
|
|
assert row[2] == 1 # auto_pinned
|
|
|
|
|
|
async def test_auto_pin_evicts_oldest_when_over_cap(tmp_path, monkeypatch):
|
|
"""Pin 9 memories with score 3; verify only 8 are pinned at the end."""
|
|
cfg = tmp_path / "config.toml"
|
|
cfg.write_text('featherless_api_key = "test"\n')
|
|
monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
|
|
db = tmp_path / "test.db"
|
|
monkeypatch.setenv("CHAT_DB_PATH", str(db))
|
|
apply_migrations(db)
|
|
settings = load_settings()
|
|
|
|
with open_db(db) as conn:
|
|
append_event(
|
|
conn,
|
|
kind="bot_authored",
|
|
payload={
|
|
"id": "bot_a",
|
|
"name": "BotA",
|
|
"persona": "...",
|
|
"voice_samples": [],
|
|
"traits": [],
|
|
"backstory": "",
|
|
"initial_relationship_to_you": "",
|
|
"kickoff_prose": "",
|
|
},
|
|
)
|
|
append_event(
|
|
conn,
|
|
kind="chat_created",
|
|
payload={
|
|
"id": "chat_bot_a",
|
|
"host_bot_id": "bot_a",
|
|
"initial_time": "2026-04-26T20:00:00+00:00",
|
|
"narrative_anchor": "Day 1",
|
|
"weather": "",
|
|
},
|
|
)
|
|
for i in range(9):
|
|
append_event(
|
|
conn,
|
|
kind="memory_written",
|
|
payload={
|
|
"owner_id": "bot_a",
|
|
"chat_id": "chat_bot_a",
|
|
"pov_summary": f"memory {i}",
|
|
"witness_you": 1,
|
|
"witness_host": 1,
|
|
"witness_guest": 0,
|
|
"source": "direct",
|
|
"reliability": 1.0,
|
|
"significance": 1,
|
|
"pinned": 0,
|
|
"auto_pinned": 0,
|
|
},
|
|
)
|
|
project(conn)
|
|
memory_ids = [
|
|
r[0]
|
|
for r in conn.execute(
|
|
"SELECT id FROM memories WHERE owner_id = 'bot_a' ORDER BY id"
|
|
).fetchall()
|
|
]
|
|
|
|
# Each job runs through its own MockLLMClient with one canned response.
|
|
factory = lambda: MockLLMClient(
|
|
canned=[json.dumps({"score": 3, "reason": "pivotal"})]
|
|
)
|
|
worker = BackgroundWorker(settings, llm_client_factory=factory)
|
|
await worker.start()
|
|
for mid in memory_ids:
|
|
worker.enqueue(
|
|
SignificanceJob(
|
|
memory_id=mid,
|
|
narrative_text="...",
|
|
prior_dialogue=[],
|
|
host_bot_id="bot_a",
|
|
)
|
|
)
|
|
await worker.stop()
|
|
|
|
with open_db(db) as conn:
|
|
pinned_count = conn.execute(
|
|
"SELECT COUNT(*) FROM memories "
|
|
"WHERE owner_id = 'bot_a' AND pinned = 1"
|
|
).fetchone()[0]
|
|
assert pinned_count == 8
|
|
|
|
# The oldest should have been evicted.
|
|
first_id = memory_ids[0]
|
|
first_pinned = conn.execute(
|
|
"SELECT pinned FROM memories WHERE id = ?", (first_id,)
|
|
).fetchone()[0]
|
|
assert first_pinned == 0
|