feat: transcript display formatting with markdown and OOC styling

2026-04-26 14:22:43 -04:00
parent 8390703b73
commit 330077afcf
6 changed files with 225 additions and 11 deletions
@@ -75,6 +75,30 @@ code { font-family: ui-monospace, "SF Mono", Menlo, monospace; }
 .drawer-toggle { padding: 4px 10px; border: 1px solid #ccc; background: #fff; color: #1c1c1c; border-radius: 3px; cursor: pointer; }
 .timeline { flex: 1; overflow-y: auto; min-height: 200px; padding: 8px 0; }
 .turn { margin: 12px 0; }
+.turn strong { display: block; margin-bottom: 4px; }
+.turn p { margin: 0 0 8px; }
+.turn p:last-child { margin-bottom: 0; }
+.turn-you strong { color: #1a73e8; }
+.turn-bot strong { color: #1c1c1c; }
+/* ``*action*`` — italic narration. */
+.action { font-style: italic; color: #555; }
+/* ``((ooc))`` — author-to-system aside. Dim, italic, smaller, set off
+   from surrounding prose so it doesn't read as in-fiction speech. */
+.ooc {
+    font-style: italic;
+    font-size: 12px;
+    color: #999;
+    display: inline-block;
+    background: rgba(0, 0, 0, 0.04);
+    padding: 1px 4px;
+    border-radius: 3px;
+}
+.turn blockquote {
+    border-left: 3px solid #ccc;
+    padding-left: 12px;
+    margin: 8px 0;
+    color: #555;
+}
 .turn-input { display: flex; flex-direction: column; gap: 8px; padding-top: 12px; border-top: 1px solid #e5e5e5; }
 .turn-input textarea { padding: 8px; font: inherit; border: 1px solid #ccc; border-radius: 3px; resize: vertical; }
 .drawer { position: fixed; top: 0; right: 0; width: 360px; height: 100vh; background: #fff; border-left: 1px solid #e5e5e5; padding: 16px; overflow-y: auto; z-index: 10; }
@@ -19,7 +19,7 @@
      {% for turn in turns %}
        <div class="turn turn-{{ turn.role }}">
          <strong>{{ turn.speaker }}</strong>
-          <p>{{ turn.text }}</p>
+          {{ turn.text|render_prose|safe }}
        </div>
      {% endfor %}
    {% endif %}
@@ -16,11 +16,17 @@ from fastapi.templating import Jinja2Templates
 from chat.state.entities import get_bot
 from chat.state.world import get_chat
 from chat.web.bots import get_conn
+from chat.web.render import render_prose
 from chat.web.turns import _read_recent_dialogue

 TEMPLATES = Jinja2Templates(
    directory=str(Path(__file__).resolve().parent.parent / "templates")
 )
+# Register the prose renderer as a Jinja filter so the chat-detail
+# template can use ``{{ turn.text|render_prose|safe }}`` (Task 33).
+# The renderer escapes user content internally; ``|safe`` is required
+# because the output contains intentional ``<p>``/``<em>``/etc. tags.
+TEMPLATES.env.filters["render_prose"] = render_prose

 router = APIRouter()

@@ -0,0 +1,106 @@
+"""Transcript display formatting (Task 33, Requirements §16.3).
+
+Bot and user prose is rendered with **lightweight markdown**:
+
+* ``*action*`` → ``<em class="action">…</em>`` — italic narration.
+* ``**bold**`` → ``<strong>…</strong>`` — emphasis.
+* ``((ooc))`` → ``<span class="ooc">((ooc))</span>`` — author-to-system
+  asides; visible to the reader, dimmed/italic in CSS, and stripped from
+  the prompt sent to the bot (see :func:`chat.web.turns._strip_ooc_for_prompt`).
+* ``> line`` → ``<blockquote>line</blockquote>``.
+* Double newline → paragraph break.
+* Everything else is HTML-escaped and wrapped in ``<p>…</p>``.
+
+No headings, code blocks, links, images, or tables — out of scope per
+Requirements §16.3. The renderer is the single source of truth used by
+both the chat-detail GET (initial timeline render, via Jinja filter) and
+the per-turn SSE fragments emitted from :mod:`chat.web.turns`.
+
+Order of operations matters:
+
+1. ``html.escape`` the whole input first — every replacement below assumes
+   user-supplied ``<``/``>``/``&`` are already neutralised, so the wrapper
+   tags we add can never collide with an attacker-controlled tag.
+2. OOC wrap before action/bold so its inner ``*`` are not interpreted.
+3. Bold (``**``) before action (``*``) — the bold pattern is stricter and
+   would otherwise be partially consumed by the action regex.
+4. Blockquote pass over already-escaped lines (so we match ``&gt;``).
+5. Paragraph split on double newline.
+"""
+
+from __future__ import annotations
+
+import html
+import re
+
+# ``((…))`` — non-greedy, allows newlines so a multi-line OOC aside still
+# wraps cleanly. The inner ``[^)]*?`` keeps it from spanning across a
+# closing-paren boundary.
+_OOC_PATTERN = re.compile(r"\(\([^)]*?\)\)", re.DOTALL)
+
+# ``**bold**`` — strict: no embedded asterisks or newlines. Must run
+# *before* the single-asterisk action pattern, otherwise ``**x**`` would
+# be partly consumed by ``*…*``.
+_BOLD_PATTERN = re.compile(r"\*\*([^*\n]+)\*\*")
+
+# ``*action*`` — single-asterisk italics; same restriction as bold.
+_ACTION_PATTERN = re.compile(r"\*([^*\n]+)\*")
+
+# ``> line`` at start of a line — note we match the *escaped* form
+# ``&gt;`` because this pass runs after ``html.escape``.
+_BLOCKQUOTE_PATTERN = re.compile(r"^&gt;\s?(.+)$", re.MULTILINE)
+
+
+def render_prose(text: str) -> str:
+    """Render prose to safe HTML.
+
+    Returns an empty string for empty/whitespace-only input so the caller
+    can append the result without producing stray ``<p></p>`` tags.
+    """
+    if not text or not text.strip():
+        return ""
+
+    # Normalise CRLF so paragraph splitting on ``\n\n`` works for input
+    # pasted from Windows clients.
+    text = text.replace("\r\n", "\n").replace("\r", "\n")
+
+    escaped = html.escape(text)
+
+    # OOC first — the wrapped span survives subsequent passes.
+    escaped = _OOC_PATTERN.sub(
+        lambda m: f'<span class="ooc">{m.group(0)}</span>', escaped
+    )
+
+    # Bold strictly before action (regex precedence — see module docstring).
+    escaped = _BOLD_PATTERN.sub(r"<strong>\1</strong>", escaped)
+    escaped = _ACTION_PATTERN.sub(r'<em class="action">\1</em>', escaped)
+
+    # Blockquote on already-escaped ``&gt;`` markers.
+    escaped = _BLOCKQUOTE_PATTERN.sub(r"<blockquote>\1</blockquote>", escaped)
+
+    # Paragraph splitting — drop empty fragments so a trailing ``\n\n``
+    # doesn't yield an empty ``<p></p>`` block.
+    paragraphs = [p.strip() for p in escaped.split("\n\n") if p.strip()]
+    return "".join(f"<p>{p}</p>" for p in paragraphs)
+
+
+def render_turn_html(speaker: str, text: str, role: str = "bot") -> str:
+    """Render a full transcript turn as ``<div class="turn …">…</div>``.
+
+    Used by both the SSE fragment publisher in :mod:`chat.web.turns`
+    (per-turn live updates) and indirectly by the chat-detail Jinja
+    template (initial render, via the ``render_prose`` filter).
+
+    ``role`` selects the CSS class (``turn-you`` vs ``turn-bot``); the
+    speaker label and role name are HTML-escaped defensively even though
+    they currently come from trusted server-side state.
+    """
+    speaker_html = html.escape(speaker)
+    role_html = html.escape(role)
+    body_html = render_prose(text)
+    return (
+        f'<div class="turn turn-{role_html}">'
+        f"<strong>{speaker_html}</strong>"
+        f"{body_html}"
+        f"</div>"
+    )
@@ -53,6 +53,7 @@ from chat.state.world import active_scene, get_chat, get_container
 from chat.web.bots import get_conn
 from chat.web.kickoff import get_llm_client
 from chat.web.pubsub import publish
+from chat.web.render import render_turn_html as _render_turn_html

 router = APIRouter()

@@ -102,16 +103,6 @@ def _read_recent_dialogue(conn, chat_id: str, limit: int = 200) -> list[dict]:
    return out


-def _render_turn_html(speaker_label: str, text: str, *, role: str) -> str:
-    """Render a single turn as a small HTML fragment (escaped)."""
-    return (
-        f'<div class="turn turn-{role}">'
-        f"<strong>{html.escape(speaker_label)}</strong>"
-        f"<p>{html.escape(text)}</p>"
-        f"</div>"
-    )
-
-
@router.post("/chats/{chat_id}/turns")
 async def post_turn(
    chat_id: str,
@@ -0,0 +1,87 @@
+"""Tests for the transcript renderer (Task 33).
+
+Lightweight markdown for transcript turns:
+- ``*action*`` → ``<em class="action">action</em>``
+- ``**bold**`` → ``<strong>bold</strong>``
+- ``((ooc))`` → ``<span class="ooc">((ooc))</span>``
+- ``> line`` → ``<blockquote>line</blockquote>``
+- paragraph breaks (double newline) → ``</p><p>``
+- everything HTML-escaped first
+
+No headings, no code blocks, no links — out of scope per Requirements §16.3.
+"""
+
+from __future__ import annotations
+
+from chat.web.render import render_prose, render_turn_html
+
+
+def test_render_prose_escapes_html():
+    """Raw HTML in user content must be escaped — no XSS surface."""
+    out = render_prose("<script>alert(1)</script>")
+    assert "<script>" not in out
+    assert "&lt;script&gt;" in out
+
+
+def test_render_prose_action_to_italic():
+    out = render_prose("*walks over*")
+    assert '<em class="action">walks over</em>' in out
+
+
+def test_render_prose_bold_before_action():
+    """Bold (``**``) must be processed before action (``*``)."""
+    out = render_prose("**emphasis** and *action*")
+    assert "<strong>emphasis</strong>" in out
+    assert '<em class="action">action</em>' in out
+    # Make sure we didn't double-wrap: no stray asterisks left behind.
+    assert "*" not in out
+
+
+def test_render_prose_ooc_wrapped():
+    out = render_prose("((this is OOC))")
+    assert '<span class="ooc">' in out
+    assert "((this is OOC))" in out
+
+
+def test_render_prose_paragraphs():
+    out = render_prose("First.\n\nSecond.")
+    # Two <p> opens and two closes.
+    assert out.count("<p>") == 2
+    assert out.count("</p>") == 2
+    assert "<p>First.</p>" in out
+    assert "<p>Second.</p>" in out
+
+
+def test_render_prose_blockquote():
+    out = render_prose("> a quote")
+    assert "<blockquote>a quote</blockquote>" in out
+
+
+def test_render_prose_empty():
+    """Empty / whitespace-only inputs produce empty output, not stray tags."""
+    assert render_prose("") == ""
+    assert render_prose("   ") == ""
+
+
+def test_render_turn_html_includes_role_class():
+    out = render_turn_html("BotA", "Hello.", role="bot")
+    assert 'class="turn turn-bot"' in out
+    assert "<strong>BotA</strong>" in out
+    assert "Hello." in out
+
+
+def test_render_turn_html_escapes_speaker():
+    """Speaker label is also HTML-escaped — names are user-controlled."""
+    out = render_turn_html("<bad>", "hi", role="you")
+    # Raw tag should not appear; escaped form should.
+    assert "<bad>" not in out
+    assert "&lt;bad&gt;" in out
+
+
+def test_render_prose_mixed_full_message():
+    """Realistic turn with action, dialogue, and an OOC aside."""
+    text = "*looks up* \"You're back late.\" ((she's tired))"
+    out = render_prose(text)
+    assert '<em class="action">looks up</em>' in out
+    # The apostrophe in ``she's`` is HTML-escaped to ``&#x27;``.
+    assert '<span class="ooc">((she&#x27;s tired))</span>' in out