chore: document NICE trim order rationale (T71.3)

T18 review (Phase 1) noted the NICE-tier trim drops previous-scene FIRST while §6.3 spec lists previous-scene LAST in the NICE tier group. Decision: keep the existing greedy order (previous-scene first), and document why. Rationale (now in code at the trim ladder): 1. Cheapest-impact-first — a per-POV previous-scene summary loses less narrative continuity than the older dialogue turns or memory hits it competes with. 2. Greedy lookahead is more expensive than the marginal narrative loss. Dropping previous-scene typically clears the soft-budget slack in one step. Test added: test_nice_trim_order_documented pins the observed order (previous-scene -> memories -> dialogue) so a future refactor can't silently invert it. Sized so that all-NICE config overflows soft but dropping just previous-scene fits — proves memories and older dialogue turns survive while previous-scene is the FIRST drop.
2026-04-26 17:16:02 -04:00
parent afd1a50958
commit 73bb8c1f17
2 changed files with 165 additions and 0 deletions
@@ -611,6 +611,26 @@ def assemble_narrative_prompt(

    # Drop NICE in order: previous scene → memories beyond top-2 →
    # older dialogue turns (collapse to 4).
+    #
+    # T71.3 — order rationale: the §6.3 spec lists NICE-tier members
+    # with previous-scene LAST, which read as a literal trim order
+    # during T18 review. We deliberately keep the greedy order shown
+    # here (previous-scene FIRST) for two reasons:
+    #
+    #   1. Cheapest-impact-first: a per-POV previous-scene summary is
+    #      a single short paragraph that loses very little narrative
+    #      continuity when dropped, while the older dialogue turns it
+    #      is competing with carry the speaker's last few beats — those
+    #      ground the next response far more concretely.
+    #   2. Greedy lookahead is more expensive than the marginal
+    #      narrative loss. Dropping previous-scene typically clears
+    #      the soft-budget slack in one step; trying memories or
+    #      dialogue first would routinely require multiple recompute
+    #      passes through the assembler.
+    #
+    # The pin test test_nice_trim_order_documented locks this order so
+    # a future refactor can't quietly invert it without surfacing the
+    # decision.
    if include_prev:
        include_prev = False
        body, total = _build(
@@ -574,6 +574,151 @@ def test_tight_budget_drops_guest_activity_bullet_first(tmp_path):
    assert "smirking-distinctively" not in body


+def test_nice_trim_order_documented(tmp_path):
+    """T71.3: pin the NICE-tier trim order so a future refactor can't
+    quietly invert it.
+
+    Order under NICE pressure is:
+      1. previous-scene summary (dropped FIRST)
+      2. memories beyond top-2
+      3. older dialogue turns (collapsed to last-4)
+
+    We size the budget so that all-NICE-included is over soft, but
+    dropping ONLY previous-scene gets us back under soft. The observed
+    behaviour we pin: previous-scene gone, memories/dialogue intact.
+    """
+    db = tmp_path / "t.db"
+    apply_migrations(db)
+    # Heavy previous-scene summary — large enough that dropping it
+    # alone clears the soft-budget overage. Defined out here so the
+    # marker is in scope for the assertions below.
+    prev_scene_blob = "PREVSCENE-MARKER " + ("filler " * 200)
+    with open_db(db) as conn:
+        # Append all events first, project once at the end (project is
+        # not idempotent — it replays every event in the log).
+        from chat.eventlog.log import append_event as _append
+        _append(conn, kind="bot_authored", payload={
+            "id": "bot_a",
+            "name": "Aria",
+            "persona": "reserved coworker who notices things",
+            "voice_samples": ["I — sorry, I didn't mean to."],
+            "traits": ["introverted"],
+            "backstory": "An archivist who joined the firm last spring.",
+            "initial_relationship_to_you": "coworker",
+            "kickoff_prose": "you stay late at the office",
+        })
+        _append(conn, kind="you_authored", payload={
+            "name": "Sam",
+            "pronouns": "they/them",
+            "persona": "tired analyst",
+        })
+        _append(conn, kind="chat_created", payload={
+            "id": "chat_bot_a",
+            "host_bot_id": "bot_a",
+            "guest_bot_id": None,
+            "initial_time": "2026-04-26T20:00:00+00:00",
+            "narrative_anchor": "Day 1 evening",
+            "weather": "clear",
+        })
+        _append(conn, kind="container_created", payload={
+            "chat_id": "chat_bot_a",
+            "name": "office bullpen",
+            "type": "workplace",
+            "properties": {"public": False, "moving": False, "audible_range": "room"},
+        })
+        _append(conn, kind="edge_update", payload={
+            "source_id": "bot_a",
+            "target_id": "you",
+            "affinity_delta": 12,
+            "trust_delta": 5,
+            "knowledge_facts": ["they work on the same floor"],
+        })
+        _append(conn, kind="activity_change", payload={
+            "entity_id": "you",
+            "container_id": 1,
+            "posture": "sitting at your desk",
+            "action": {"verb": "finishing emails"},
+            "attention": "the screen",
+        })
+        _append(conn, kind="activity_change", payload={
+            "entity_id": "bot_a",
+            "container_id": 1,
+            "posture": "sitting at her desk",
+            "action": {"verb": "pretending to work"},
+            "attention": "you, in glances",
+        })
+        _append(conn, kind="scene_opened", payload={
+            "chat_id": "chat_bot_a",
+            "container_id": 1,
+            "started_at": "2026-04-26T20:00:00+00:00",
+            "participants": ["you", "bot_a"],
+        })
+        # Close the seeded scene and write a per-POV summary memory so
+        # _resolve_previous_scene_summary returns a non-empty string.
+        _append(conn, kind="scene_closed", payload={
+            "scene_id": 1,
+            "ended_at": "2026-04-26T20:30:00+00:00",
+            "significance": 2,
+        })
+        _append(conn, kind="memory_written", payload={
+            "owner_id": "bot_a",
+            "chat_id": "chat_bot_a",
+            "scene_id": 1,
+            "pov_summary": prev_scene_blob,
+            "witness_you": 1,
+            "witness_host": 1,
+            "witness_guest": 0,
+            "source": "direct",
+            "reliability": 1.0,
+            "significance": 2,
+        })
+        project(conn)
+
+        # Six dialogue turns — last 4 plus 2 older. If older turns are
+        # dropped under NICE pressure, the unique markers for turns 0/1
+        # disappear; we'll assert they REMAIN to prove dialogue trim
+        # didn't fire.
+        dialogue = [
+            {"speaker": "you", "text": "DLG-OLD-00 hello"},
+            {"speaker": "bot_a", "text": "DLG-OLD-01 hi"},
+            {"speaker": "you", "text": "DLG-LAST-16 ok"},
+            {"speaker": "bot_a", "text": "DLG-LAST-17 sure"},
+            {"speaker": "you", "text": "DLG-LAST-18 night"},
+            {"speaker": "bot_a", "text": "DLG-LAST-19 indeed"},
+        ]
+        # Four small memories — if "memories beyond top-2" trim fires,
+        # MEM-C/MEM-D disappear; we'll assert they REMAIN to prove
+        # memories trim didn't fire either.
+        memories = ["MEM-A short", "MEM-B short", "MEM-C short", "MEM-D short"]
+
+        # Soft tuned so the all-NICE config (with the heavy previous
+        # scene summary) overflows, but dropping just previous-scene
+        # fits comfortably. Hard set high so SHOULD-tier never trims.
+        msgs = assemble_narrative_prompt(
+            conn,
+            chat_id="chat_bot_a",
+            speaker_bot_id="bot_a",
+            recent_dialogue=dialogue,
+            retrieved_memory_summaries=memories,
+            budget_soft=400,
+            budget_hard=8000,
+        )
+    body = msgs[0].content
+    # Previous-scene summary was the FIRST NICE drop — its unique
+    # marker must be absent.
+    assert "PREVSCENE-MARKER" not in body
+    # Memories beyond top-2 stayed (proves memories trim did NOT fire).
+    assert "MEM-A" in body
+    assert "MEM-B" in body
+    assert "MEM-C" in body
+    assert "MEM-D" in body
+    # Older dialogue turns stayed (proves dialogue trim did NOT fire).
+    assert "DLG-OLD-00" in body
+    assert "DLG-OLD-01" in body
+    # Last-4 dialogue turns of course present.
+    assert "DLG-LAST-19" in body
+
+
 def test_assemble_with_tight_budget_drops_guest_activity_first(tmp_path):
    """Under tight budget MUST blocks survive but SHOULD-tier guest
    activity is dropped first."""