feat: cap narrative output at 2-3 beats via trim_to_max_beats post-processor

Verbose roleplay-tuned narrators (Cydonia, Magnum, etc.) reliably ignore prompt-level beat-count instructions and ramble for 6-12 asterisk-action beats per turn — even with HARD CAP language and worked examples in the closing instruction. The fix is a deterministic post-stream trimmer: - New trim_to_max_beats(text, max_beats) in chat/services/prompt.py. Counts * characters in the streamed output (each beat = 2 asterisks: open + close), trims at the start of the (max_beats+1)th asterisk action, strips trailing whitespace. Idempotent and safe on under-cap input. - Wired into post_turn for both the primary stream (3-beat cap) and the optional interjection stream (2-beat cap — interjections are by definition shorter chime-ins). - Tightened the closing instruction: explicit "HARD CAP: 2-3 beats" with "After the third beat, STOP". Helps the well-behaved models self-cap; the post-processor catches the rest. - max_tokens: 250 -> 160 (lets the 3rd beat finish naturally before hitting the physical cap; trim_to_max_beats handles 4+ beat overflow). temperature: 0.85 -> 0.7 (Cydonia is more compliant with format instructions at slightly cooler sampling). - Test budgets bumped (closing grew ~15 tokens with the new wording). 6 new tests for trim_to_max_beats covering passthrough, exact-cap, 4-beat trim, 6-beat runaway, lower caps, zero cap. Verified live: 4-turn bench against chat_maya, every response is 2-3 beats consistently. Suite: 470 passed in 11.7s.
2026-04-27 14:19:21 -04:00
parent a902d86432
commit 3b83786b8b
4 changed files with 116 additions and 22 deletions
@@ -21,7 +21,11 @@ import chat.state.world  # noqa: F401
 import chat.state.events  # noqa: F401
 import chat.state.threads  # noqa: F401
 from chat.llm.client import Message
-from chat.services.prompt import _witness_role_for, assemble_narrative_prompt
+from chat.services.prompt import (
+    _witness_role_for,
+    assemble_narrative_prompt,
+    trim_to_max_beats,
+)


 def _seed_basic(conn) -> None:
@@ -569,8 +573,8 @@ def test_tight_budget_drops_guest_activity_bullet_first(tmp_path):
            # (Phase 4.6 narrative-style fix). Budget bumped enough to
            # accommodate the larger MUST floor while still exercising
            # the SHOULD-tier trim path.
-            budget_soft=440,
-            budget_hard=460,
+            budget_soft=480,
+            budget_hard=510,
        )
    body = msgs[0].content
    # Speaker bullet survives (MUST-tier floor).
@@ -758,8 +762,8 @@ def test_assemble_with_tight_budget_drops_guest_activity_first(tmp_path):
            # (Phase 4.6 narrative-style fix). Budget bumped enough to
            # accommodate the larger MUST floor while still exercising
            # the SHOULD-tier trim path.
-            budget_soft=440,
-            budget_hard=460,
+            budget_soft=480,
+            budget_hard=510,
        )
    body = msgs[0].content
    # MUST: speaker identity, edge to addressee, last 4 dialogue turns.
@@ -773,7 +777,7 @@ def test_assemble_with_tight_budget_drops_guest_activity_first(tmp_path):
    # instruction that ships the asterisk-format spec.
    import tiktoken
    enc = tiktoken.get_encoding("cl100k_base")
-    assert len(enc.encode(body)) <= 460
+    assert len(enc.encode(body)) <= 510


 # ---------------------------------------------------------------------------
@@ -870,3 +874,44 @@ def test_witness_role_for_none_host_returns_host():
    # Sanity check: existing semantics preserved.
    assert _witness_role_for("bot_a", "bot_a") == "host"
    assert _witness_role_for("bot_a", "bot_b") == "guest"
+
+
+# ---------------------------------------------------------------------------
+# trim_to_max_beats — caps verbose narrative output to N beats
+# ---------------------------------------------------------------------------
+
+
+def test_trim_to_max_beats_passthrough_when_under_cap():
+    assert trim_to_max_beats("", 3) == ""
+    assert trim_to_max_beats("plain text", 3) == "plain text"
+    two = "*She nods* okay. *She turns* see you."
+    assert trim_to_max_beats(two, 3) == two
+
+
+def test_trim_to_max_beats_passthrough_at_exactly_cap():
+    three = "*A* one. *B* two. *C* three."
+    assert trim_to_max_beats(three, 3) == three
+
+
+def test_trim_to_max_beats_cuts_at_fourth_beat():
+    """Cydonia-style 4-beat output trimmed at the start of the 4th
+    asterisk action; trailing whitespace stripped."""
+    four = "*A* one. *B* two. *C* three. *D* four."
+    assert trim_to_max_beats(four, 3) == "*A* one. *B* two. *C* three."
+
+
+def test_trim_to_max_beats_handles_runaway_six_beats():
+    """The exact failure mode that motivated this — verbose narrator
+    rambling for 6 beats when the prompt asked for 2-3."""
+    six = "*A* 1 *B* 2 *C* 3 *D* 4 *E* 5 *F* 6"
+    assert trim_to_max_beats(six, 3) == "*A* 1 *B* 2 *C* 3"
+
+
+def test_trim_to_max_beats_respects_lower_cap():
+    four = "*A* one. *B* two. *C* three. *D* four."
+    assert trim_to_max_beats(four, 2) == "*A* one. *B* two."
+    assert trim_to_max_beats(four, 1) == "*A* one."
+
+
+def test_trim_to_max_beats_zero_returns_empty():
+    assert trim_to_max_beats("*A* one. *B* two.", 0) == ""