diff --git a/chat/config.py b/chat/config.py
index 332c95c..1af77ca 100644
--- a/chat/config.py
+++ b/chat/config.py
@@ -25,15 +25,20 @@ class Settings(BaseModel):
     narrative_budget_soft: int = 6000
     # Cap on each generated bot response. The asterisk-action format
     # (see ``_closing_instruction`` in chat/services/prompt.py) targets
-    # 2-4 short interleaved action+dialogue beats; ~250 tokens fits that
-    # without leaving room for the model to drift into multi-paragraph
-    # inner-monologue prose. Bump back up if you want longer scenes;
-    # drop to 150 for very terse banter.
-    narrative_max_tokens: int = 250
+    # 2-3 short interleaved action+dialogue beats. Verbose roleplay
+    # narrators (Cydonia, Magnum) ignore the prompt's cap and keep
+    # going; ``trim_to_max_beats`` in chat/services/prompt.py handles
+    # the actual cap by trimming at a beat boundary post-stream. This
+    # max_tokens setting just gives the third beat enough room to
+    # complete naturally before max_tokens cuts mid-action: 160 fits
+    # 3 substantive beats with margin. Bump to 250 for longer scenes;
+    # drop to 80 for terse banter.
+    narrative_max_tokens: int = 160
     # Sampling temperature for narrative generation. 0.7 = grounded /
-    # consistent; 0.85 = creative-but-in-character (default); 1.0 = wide
-    # variety, can drift; >1.0 = often off-the-rails.
-    narrative_temperature: float = 0.85
+    # instruction-compliant (current — Cydonia is verbose-by-default and
+    # tighter temperature helps it respect the 2-3-beat cap);
+    # 0.85 = creative; 1.0 = wide variety; >1.0 = often off-the-rails.
+    narrative_temperature: float = 0.7
     classifier_budget_hard: int = 4000
     classifier_timeout_s: float = 30.0
     # Featherless free tier and lower paid tiers cap concurrent connections.
diff --git a/chat/services/prompt.py b/chat/services/prompt.py
index cae9fee..e313337 100644
--- a/chat/services/prompt.py
+++ b/chat/services/prompt.py
@@ -325,6 +325,36 @@ def _build_open_threads_block(threads: list[dict]) -> str | None:
     return "\n".join(lines)
 
 
+def trim_to_max_beats(text: str, max_beats: int = 3) -> str:
+    """Truncate ``text`` to at most ``max_beats`` asterisk-action beats.
+
+    A "beat" is one ``*action*`` markdown-italic block plus the dialogue
+    that follows it; counting ``*`` characters works as a deterministic
+    boundary detector since each complete beat contributes exactly two
+    asterisks (open + close). The (2*max_beats + 1)th asterisk is the
+    opening of an over-the-cap beat; we trim immediately before it and
+    strip trailing whitespace.
+
+    Belt-and-suspenders for verbose roleplay-tuned narrators (Cydonia,
+    Magnum, etc.) that reliably ignore "HARD CAP: 2-3 beats" prompt
+    instructions and keep going. A physical max_tokens cap helps but
+    truncates mid-word; this trims at a beat boundary instead.
+
+    Idempotent and safe on outputs with fewer beats than the cap (just
+    returns the text unchanged after a single pass).
+    """
+    if max_beats <= 0:
+        return ""
+    target = max_beats * 2
+    count = 0
+    for i, ch in enumerate(text):
+        if ch == "*":
+            count += 1
+            if count > target:
+                return text[:i].rstrip()
+    return text
+
+
 def _closing_instruction(speaker_name: str, addressee_name: str) -> str:
     return (
         f"Continue as {speaker_name}. Format strictly:\n"
@@ -333,17 +363,21 @@ def _closing_instruction(speaker_name: str, addressee_name: str) -> str:
         "thoughts inside asterisks.\n"
         "- Speak dialogue as plain text between action beats, no quote "
         "marks. Keep speech fragmented, not paragraphs.\n"
-        "- Interleave 2-4 short beats (action, brief speech, action, brief "
-        "speech). Each beat is one concrete gesture or sensory image — no "
+        "- HARD CAP: 2-3 beats per response. A beat is one *asterisk "
+        "action* paired with a short dialogue fragment. After the "
+        "third beat, STOP — do not add a fourth, do not summarize, do "
+        f"not narrate {addressee_name}'s reaction. Long responses break "
+        "the scene's rhythm.\n"
+        "- Each beat is one concrete gesture or sensory image. No "
         "explanation, no inner monologue, no stage-direction adverbs.\n"
         "- Trailing ellipses (...) are fine for emotional weight.\n"
-        "Example: *She turns with soapy hands to cup your face* That's how "
-        "I know it's real... *She kisses you softly* You love me when I'm "
-        "messy... *She rests her forehead against yours* ...and every "
-        "moment in between.\n"
+        "EXAMPLE (3 beats, stops cleanly):\n"
+        "*She turns with soapy hands to cup your face* That's how I know "
+        "it's real... *She kisses you softly* You love me when I'm messy... "
+        "*She smiles tearfully* ...and every moment in between.\n"
         f"Show only what {addressee_name} could externally observe of "
-        f"{speaker_name}; never narrate {addressee_name}'s actions or "
-        "thoughts. One response — leave room to react."
+        f"{speaker_name}; never narrate {addressee_name}'s actions, "
+        "thoughts, or speech. One response — leave room to react."
     )
 
 
diff --git a/chat/web/turns.py b/chat/web/turns.py
index 623390d..9b3076b 100644
--- a/chat/web/turns.py
+++ b/chat/web/turns.py
@@ -67,6 +67,7 @@ from chat.services.multi_state_update import compute_state_updates_for_present
 from chat.services.prompt import (
     assemble_narrative_prompt,
     consume_pending_meanwhile_digests,
+    trim_to_max_beats,
 )
 from chat.services.rewind import compute_rewind_preview, execute_rewind
 from chat.services.scene_close import detect_scene_close
@@ -482,6 +483,11 @@ async def post_turn(
         _in_flight_tasks.pop(chat_id, None)
 
     primary_text = "".join(primary_accumulated)
+    # Belt-and-suspenders: trim to 3 beats max even if the model
+    # ignored the "HARD CAP: 2-3 beats" prompt instruction. Roleplay-
+    # tuned narrators are reliably verbose; a physical max_tokens
+    # truncates mid-word, this trims at a beat boundary.
+    primary_text = trim_to_max_beats(primary_text, max_beats=3)
 
     # 7. Append the assistant_turn with the final text. (See note above on
     # why we skip ``project`` for these transcript-only event kinds.)
@@ -677,6 +683,10 @@ async def post_turn(
                 _in_flight_tasks.pop(chat_id, None)
 
             interjection_text = "".join(interject_accumulated)
+            # Same beat-cap as the primary turn — interjections are
+            # by definition short, but Cydonia-class narrators ignore
+            # that. 2 beats is plenty for a chime-in.
+            interjection_text = trim_to_max_beats(interjection_text, max_beats=2)
 
             # Capture the event id (T86 follow-up) so the SSE fragment
             # below carries ``id="turn-<n>"`` for in-place swap.
diff --git a/tests/test_prompt.py b/tests/test_prompt.py
index be12271..721923f 100644
--- a/tests/test_prompt.py
+++ b/tests/test_prompt.py
@@ -21,7 +21,11 @@ import chat.state.world  # noqa: F401
 import chat.state.events  # noqa: F401
 import chat.state.threads  # noqa: F401
 from chat.llm.client import Message
-from chat.services.prompt import _witness_role_for, assemble_narrative_prompt
+from chat.services.prompt import (
+    _witness_role_for,
+    assemble_narrative_prompt,
+    trim_to_max_beats,
+)
 
 
 def _seed_basic(conn) -> None:
@@ -569,8 +573,8 @@ def test_tight_budget_drops_guest_activity_bullet_first(tmp_path):
             # (Phase 4.6 narrative-style fix). Budget bumped enough to
             # accommodate the larger MUST floor while still exercising
             # the SHOULD-tier trim path.
-            budget_soft=440,
-            budget_hard=460,
+            budget_soft=480,
+            budget_hard=510,
         )
     body = msgs[0].content
     # Speaker bullet survives (MUST-tier floor).
@@ -758,8 +762,8 @@ def test_assemble_with_tight_budget_drops_guest_activity_first(tmp_path):
             # (Phase 4.6 narrative-style fix). Budget bumped enough to
             # accommodate the larger MUST floor while still exercising
             # the SHOULD-tier trim path.
-            budget_soft=440,
-            budget_hard=460,
+            budget_soft=480,
+            budget_hard=510,
         )
     body = msgs[0].content
     # MUST: speaker identity, edge to addressee, last 4 dialogue turns.
@@ -773,7 +777,7 @@ def test_assemble_with_tight_budget_drops_guest_activity_first(tmp_path):
     # instruction that ships the asterisk-format spec.
     import tiktoken
     enc = tiktoken.get_encoding("cl100k_base")
-    assert len(enc.encode(body)) <= 460
+    assert len(enc.encode(body)) <= 510
 
 
 # ---------------------------------------------------------------------------
@@ -870,3 +874,44 @@ def test_witness_role_for_none_host_returns_host():
     # Sanity check: existing semantics preserved.
     assert _witness_role_for("bot_a", "bot_a") == "host"
     assert _witness_role_for("bot_a", "bot_b") == "guest"
+
+
+# ---------------------------------------------------------------------------
+# trim_to_max_beats — caps verbose narrative output to N beats
+# ---------------------------------------------------------------------------
+
+
+def test_trim_to_max_beats_passthrough_when_under_cap():
+    assert trim_to_max_beats("", 3) == ""
+    assert trim_to_max_beats("plain text", 3) == "plain text"
+    two = "*She nods* okay. *She turns* see you."
+    assert trim_to_max_beats(two, 3) == two
+
+
+def test_trim_to_max_beats_passthrough_at_exactly_cap():
+    three = "*A* one. *B* two. *C* three."
+    assert trim_to_max_beats(three, 3) == three
+
+
+def test_trim_to_max_beats_cuts_at_fourth_beat():
+    """Cydonia-style 4-beat output trimmed at the start of the 4th
+    asterisk action; trailing whitespace stripped."""
+    four = "*A* one. *B* two. *C* three. *D* four."
+    assert trim_to_max_beats(four, 3) == "*A* one. *B* two. *C* three."
+
+
+def test_trim_to_max_beats_handles_runaway_six_beats():
+    """The exact failure mode that motivated this — verbose narrator
+    rambling for 6 beats when the prompt asked for 2-3."""
+    six = "*A* 1 *B* 2 *C* 3 *D* 4 *E* 5 *F* 6"
+    assert trim_to_max_beats(six, 3) == "*A* 1 *B* 2 *C* 3"
+
+
+def test_trim_to_max_beats_respects_lower_cap():
+    four = "*A* one. *B* two. *C* three. *D* four."
+    assert trim_to_max_beats(four, 2) == "*A* one. *B* two."
+    assert trim_to_max_beats(four, 1) == "*A* one."
+
+
+def test_trim_to_max_beats_zero_returns_empty():
+    assert trim_to_max_beats("*A* one. *B* two.", 0) == ""