diff --git a/chat/services/prompt.py b/chat/services/prompt.py index c820136..6e6d72c 100644 --- a/chat/services/prompt.py +++ b/chat/services/prompt.py @@ -611,6 +611,26 @@ def assemble_narrative_prompt( # Drop NICE in order: previous scene → memories beyond top-2 → # older dialogue turns (collapse to 4). + # + # T71.3 — order rationale: the §6.3 spec lists NICE-tier members + # with previous-scene LAST, which read as a literal trim order + # during T18 review. We deliberately keep the greedy order shown + # here (previous-scene FIRST) for two reasons: + # + # 1. Cheapest-impact-first: a per-POV previous-scene summary is + # a single short paragraph that loses very little narrative + # continuity when dropped, while the older dialogue turns it + # is competing with carry the speaker's last few beats — those + # ground the next response far more concretely. + # 2. Greedy lookahead is more expensive than the marginal + # narrative loss. Dropping previous-scene typically clears + # the soft-budget slack in one step; trying memories or + # dialogue first would routinely require multiple recompute + # passes through the assembler. + # + # The pin test test_nice_trim_order_documented locks this order so + # a future refactor can't quietly invert it without surfacing the + # decision. if include_prev: include_prev = False body, total = _build( diff --git a/tests/test_prompt.py b/tests/test_prompt.py index 322ddab..f50fdea 100644 --- a/tests/test_prompt.py +++ b/tests/test_prompt.py @@ -574,6 +574,151 @@ def test_tight_budget_drops_guest_activity_bullet_first(tmp_path): assert "smirking-distinctively" not in body +def test_nice_trim_order_documented(tmp_path): + """T71.3: pin the NICE-tier trim order so a future refactor can't + quietly invert it. + + Order under NICE pressure is: + 1. previous-scene summary (dropped FIRST) + 2. memories beyond top-2 + 3. older dialogue turns (collapsed to last-4) + + We size the budget so that all-NICE-included is over soft, but + dropping ONLY previous-scene gets us back under soft. The observed + behaviour we pin: previous-scene gone, memories/dialogue intact. + """ + db = tmp_path / "t.db" + apply_migrations(db) + # Heavy previous-scene summary — large enough that dropping it + # alone clears the soft-budget overage. Defined out here so the + # marker is in scope for the assertions below. + prev_scene_blob = "PREVSCENE-MARKER " + ("filler " * 200) + with open_db(db) as conn: + # Append all events first, project once at the end (project is + # not idempotent — it replays every event in the log). + from chat.eventlog.log import append_event as _append + _append(conn, kind="bot_authored", payload={ + "id": "bot_a", + "name": "Aria", + "persona": "reserved coworker who notices things", + "voice_samples": ["I — sorry, I didn't mean to."], + "traits": ["introverted"], + "backstory": "An archivist who joined the firm last spring.", + "initial_relationship_to_you": "coworker", + "kickoff_prose": "you stay late at the office", + }) + _append(conn, kind="you_authored", payload={ + "name": "Sam", + "pronouns": "they/them", + "persona": "tired analyst", + }) + _append(conn, kind="chat_created", payload={ + "id": "chat_bot_a", + "host_bot_id": "bot_a", + "guest_bot_id": None, + "initial_time": "2026-04-26T20:00:00+00:00", + "narrative_anchor": "Day 1 evening", + "weather": "clear", + }) + _append(conn, kind="container_created", payload={ + "chat_id": "chat_bot_a", + "name": "office bullpen", + "type": "workplace", + "properties": {"public": False, "moving": False, "audible_range": "room"}, + }) + _append(conn, kind="edge_update", payload={ + "source_id": "bot_a", + "target_id": "you", + "affinity_delta": 12, + "trust_delta": 5, + "knowledge_facts": ["they work on the same floor"], + }) + _append(conn, kind="activity_change", payload={ + "entity_id": "you", + "container_id": 1, + "posture": "sitting at your desk", + "action": {"verb": "finishing emails"}, + "attention": "the screen", + }) + _append(conn, kind="activity_change", payload={ + "entity_id": "bot_a", + "container_id": 1, + "posture": "sitting at her desk", + "action": {"verb": "pretending to work"}, + "attention": "you, in glances", + }) + _append(conn, kind="scene_opened", payload={ + "chat_id": "chat_bot_a", + "container_id": 1, + "started_at": "2026-04-26T20:00:00+00:00", + "participants": ["you", "bot_a"], + }) + # Close the seeded scene and write a per-POV summary memory so + # _resolve_previous_scene_summary returns a non-empty string. + _append(conn, kind="scene_closed", payload={ + "scene_id": 1, + "ended_at": "2026-04-26T20:30:00+00:00", + "significance": 2, + }) + _append(conn, kind="memory_written", payload={ + "owner_id": "bot_a", + "chat_id": "chat_bot_a", + "scene_id": 1, + "pov_summary": prev_scene_blob, + "witness_you": 1, + "witness_host": 1, + "witness_guest": 0, + "source": "direct", + "reliability": 1.0, + "significance": 2, + }) + project(conn) + + # Six dialogue turns — last 4 plus 2 older. If older turns are + # dropped under NICE pressure, the unique markers for turns 0/1 + # disappear; we'll assert they REMAIN to prove dialogue trim + # didn't fire. + dialogue = [ + {"speaker": "you", "text": "DLG-OLD-00 hello"}, + {"speaker": "bot_a", "text": "DLG-OLD-01 hi"}, + {"speaker": "you", "text": "DLG-LAST-16 ok"}, + {"speaker": "bot_a", "text": "DLG-LAST-17 sure"}, + {"speaker": "you", "text": "DLG-LAST-18 night"}, + {"speaker": "bot_a", "text": "DLG-LAST-19 indeed"}, + ] + # Four small memories — if "memories beyond top-2" trim fires, + # MEM-C/MEM-D disappear; we'll assert they REMAIN to prove + # memories trim didn't fire either. + memories = ["MEM-A short", "MEM-B short", "MEM-C short", "MEM-D short"] + + # Soft tuned so the all-NICE config (with the heavy previous + # scene summary) overflows, but dropping just previous-scene + # fits comfortably. Hard set high so SHOULD-tier never trims. + msgs = assemble_narrative_prompt( + conn, + chat_id="chat_bot_a", + speaker_bot_id="bot_a", + recent_dialogue=dialogue, + retrieved_memory_summaries=memories, + budget_soft=400, + budget_hard=8000, + ) + body = msgs[0].content + # Previous-scene summary was the FIRST NICE drop — its unique + # marker must be absent. + assert "PREVSCENE-MARKER" not in body + # Memories beyond top-2 stayed (proves memories trim did NOT fire). + assert "MEM-A" in body + assert "MEM-B" in body + assert "MEM-C" in body + assert "MEM-D" in body + # Older dialogue turns stayed (proves dialogue trim did NOT fire). + assert "DLG-OLD-00" in body + assert "DLG-OLD-01" in body + # Last-4 dialogue turns of course present. + assert "DLG-LAST-19" in body + + def test_assemble_with_tight_budget_drops_guest_activity_first(tmp_path): """Under tight budget MUST blocks survive but SHOULD-tier guest activity is dropped first."""