feat: transcript display formatting with markdown and OOC styling

This commit is contained in:
Joseph Doherty
2026-04-26 14:22:43 -04:00
parent 8390703b73
commit 330077afcf
6 changed files with 225 additions and 11 deletions
+24
View File
@@ -75,6 +75,30 @@ code { font-family: ui-monospace, "SF Mono", Menlo, monospace; }
.drawer-toggle { padding: 4px 10px; border: 1px solid #ccc; background: #fff; color: #1c1c1c; border-radius: 3px; cursor: pointer; }
.timeline { flex: 1; overflow-y: auto; min-height: 200px; padding: 8px 0; }
.turn { margin: 12px 0; }
.turn strong { display: block; margin-bottom: 4px; }
.turn p { margin: 0 0 8px; }
.turn p:last-child { margin-bottom: 0; }
.turn-you strong { color: #1a73e8; }
.turn-bot strong { color: #1c1c1c; }
/* ``*action*`` — italic narration. */
.action { font-style: italic; color: #555; }
/* ``((ooc))`` — author-to-system aside. Dim, italic, smaller, set off
from surrounding prose so it doesn't read as in-fiction speech. */
.ooc {
font-style: italic;
font-size: 12px;
color: #999;
display: inline-block;
background: rgba(0, 0, 0, 0.04);
padding: 1px 4px;
border-radius: 3px;
}
.turn blockquote {
border-left: 3px solid #ccc;
padding-left: 12px;
margin: 8px 0;
color: #555;
}
.turn-input { display: flex; flex-direction: column; gap: 8px; padding-top: 12px; border-top: 1px solid #e5e5e5; }
.turn-input textarea { padding: 8px; font: inherit; border: 1px solid #ccc; border-radius: 3px; resize: vertical; }
.drawer { position: fixed; top: 0; right: 0; width: 360px; height: 100vh; background: #fff; border-left: 1px solid #e5e5e5; padding: 16px; overflow-y: auto; z-index: 10; }
+1 -1
View File
@@ -19,7 +19,7 @@
{% for turn in turns %}
<div class="turn turn-{{ turn.role }}">
<strong>{{ turn.speaker }}</strong>
<p>{{ turn.text }}</p>
{{ turn.text|render_prose|safe }}
</div>
{% endfor %}
{% endif %}
+6
View File
@@ -16,11 +16,17 @@ from fastapi.templating import Jinja2Templates
from chat.state.entities import get_bot
from chat.state.world import get_chat
from chat.web.bots import get_conn
from chat.web.render import render_prose
from chat.web.turns import _read_recent_dialogue
TEMPLATES = Jinja2Templates(
directory=str(Path(__file__).resolve().parent.parent / "templates")
)
# Register the prose renderer as a Jinja filter so the chat-detail
# template can use ``{{ turn.text|render_prose|safe }}`` (Task 33).
# The renderer escapes user content internally; ``|safe`` is required
# because the output contains intentional ``<p>``/``<em>``/etc. tags.
TEMPLATES.env.filters["render_prose"] = render_prose
router = APIRouter()
+106
View File
@@ -0,0 +1,106 @@
"""Transcript display formatting (Task 33, Requirements §16.3).
Bot and user prose is rendered with **lightweight markdown**:
* ``*action*`` → ``<em class="action">…</em>`` — italic narration.
* ``**bold**`` → ``<strong>…</strong>`` — emphasis.
* ``((ooc))`` → ``<span class="ooc">((ooc))</span>`` — author-to-system
asides; visible to the reader, dimmed/italic in CSS, and stripped from
the prompt sent to the bot (see :func:`chat.web.turns._strip_ooc_for_prompt`).
* ``> line`` → ``<blockquote>line</blockquote>``.
* Double newline → paragraph break.
* Everything else is HTML-escaped and wrapped in ``<p>…</p>``.
No headings, code blocks, links, images, or tables — out of scope per
Requirements §16.3. The renderer is the single source of truth used by
both the chat-detail GET (initial timeline render, via Jinja filter) and
the per-turn SSE fragments emitted from :mod:`chat.web.turns`.
Order of operations matters:
1. ``html.escape`` the whole input first — every replacement below assumes
user-supplied ``<``/``>``/``&`` are already neutralised, so the wrapper
tags we add can never collide with an attacker-controlled tag.
2. OOC wrap before action/bold so its inner ``*`` are not interpreted.
3. Bold (``**``) before action (``*``) — the bold pattern is stricter and
would otherwise be partially consumed by the action regex.
4. Blockquote pass over already-escaped lines (so we match ``&gt;``).
5. Paragraph split on double newline.
"""
from __future__ import annotations
import html
import re
# ``((…))`` — non-greedy, allows newlines so a multi-line OOC aside still
# wraps cleanly. The inner ``[^)]*?`` keeps it from spanning across a
# closing-paren boundary.
_OOC_PATTERN = re.compile(r"\(\([^)]*?\)\)", re.DOTALL)
# ``**bold**`` — strict: no embedded asterisks or newlines. Must run
# *before* the single-asterisk action pattern, otherwise ``**x**`` would
# be partly consumed by ``*…*``.
_BOLD_PATTERN = re.compile(r"\*\*([^*\n]+)\*\*")
# ``*action*`` — single-asterisk italics; same restriction as bold.
_ACTION_PATTERN = re.compile(r"\*([^*\n]+)\*")
# ``> line`` at start of a line — note we match the *escaped* form
# ``&gt;`` because this pass runs after ``html.escape``.
_BLOCKQUOTE_PATTERN = re.compile(r"^&gt;\s?(.+)$", re.MULTILINE)
def render_prose(text: str) -> str:
"""Render prose to safe HTML.
Returns an empty string for empty/whitespace-only input so the caller
can append the result without producing stray ``<p></p>`` tags.
"""
if not text or not text.strip():
return ""
# Normalise CRLF so paragraph splitting on ``\n\n`` works for input
# pasted from Windows clients.
text = text.replace("\r\n", "\n").replace("\r", "\n")
escaped = html.escape(text)
# OOC first — the wrapped span survives subsequent passes.
escaped = _OOC_PATTERN.sub(
lambda m: f'<span class="ooc">{m.group(0)}</span>', escaped
)
# Bold strictly before action (regex precedence — see module docstring).
escaped = _BOLD_PATTERN.sub(r"<strong>\1</strong>", escaped)
escaped = _ACTION_PATTERN.sub(r'<em class="action">\1</em>', escaped)
# Blockquote on already-escaped ``&gt;`` markers.
escaped = _BLOCKQUOTE_PATTERN.sub(r"<blockquote>\1</blockquote>", escaped)
# Paragraph splitting — drop empty fragments so a trailing ``\n\n``
# doesn't yield an empty ``<p></p>`` block.
paragraphs = [p.strip() for p in escaped.split("\n\n") if p.strip()]
return "".join(f"<p>{p}</p>" for p in paragraphs)
def render_turn_html(speaker: str, text: str, role: str = "bot") -> str:
"""Render a full transcript turn as ``<div class="turn …">…</div>``.
Used by both the SSE fragment publisher in :mod:`chat.web.turns`
(per-turn live updates) and indirectly by the chat-detail Jinja
template (initial render, via the ``render_prose`` filter).
``role`` selects the CSS class (``turn-you`` vs ``turn-bot``); the
speaker label and role name are HTML-escaped defensively even though
they currently come from trusted server-side state.
"""
speaker_html = html.escape(speaker)
role_html = html.escape(role)
body_html = render_prose(text)
return (
f'<div class="turn turn-{role_html}">'
f"<strong>{speaker_html}</strong>"
f"{body_html}"
f"</div>"
)
+1 -10
View File
@@ -53,6 +53,7 @@ from chat.state.world import active_scene, get_chat, get_container
from chat.web.bots import get_conn
from chat.web.kickoff import get_llm_client
from chat.web.pubsub import publish
from chat.web.render import render_turn_html as _render_turn_html
router = APIRouter()
@@ -102,16 +103,6 @@ def _read_recent_dialogue(conn, chat_id: str, limit: int = 200) -> list[dict]:
return out
def _render_turn_html(speaker_label: str, text: str, *, role: str) -> str:
"""Render a single turn as a small HTML fragment (escaped)."""
return (
f'<div class="turn turn-{role}">'
f"<strong>{html.escape(speaker_label)}</strong>"
f"<p>{html.escape(text)}</p>"
f"</div>"
)
@router.post("/chats/{chat_id}/turns")
async def post_turn(
chat_id: str,
+87
View File
@@ -0,0 +1,87 @@
"""Tests for the transcript renderer (Task 33).
Lightweight markdown for transcript turns:
- ``*action*`` → ``<em class="action">action</em>``
- ``**bold**`` → ``<strong>bold</strong>``
- ``((ooc))`` → ``<span class="ooc">((ooc))</span>``
- ``> line`` → ``<blockquote>line</blockquote>``
- paragraph breaks (double newline) → ``</p><p>``
- everything HTML-escaped first
No headings, no code blocks, no links — out of scope per Requirements §16.3.
"""
from __future__ import annotations
from chat.web.render import render_prose, render_turn_html
def test_render_prose_escapes_html():
"""Raw HTML in user content must be escaped — no XSS surface."""
out = render_prose("<script>alert(1)</script>")
assert "<script>" not in out
assert "&lt;script&gt;" in out
def test_render_prose_action_to_italic():
out = render_prose("*walks over*")
assert '<em class="action">walks over</em>' in out
def test_render_prose_bold_before_action():
"""Bold (``**``) must be processed before action (``*``)."""
out = render_prose("**emphasis** and *action*")
assert "<strong>emphasis</strong>" in out
assert '<em class="action">action</em>' in out
# Make sure we didn't double-wrap: no stray asterisks left behind.
assert "*" not in out
def test_render_prose_ooc_wrapped():
out = render_prose("((this is OOC))")
assert '<span class="ooc">' in out
assert "((this is OOC))" in out
def test_render_prose_paragraphs():
out = render_prose("First.\n\nSecond.")
# Two <p> opens and two closes.
assert out.count("<p>") == 2
assert out.count("</p>") == 2
assert "<p>First.</p>" in out
assert "<p>Second.</p>" in out
def test_render_prose_blockquote():
out = render_prose("> a quote")
assert "<blockquote>a quote</blockquote>" in out
def test_render_prose_empty():
"""Empty / whitespace-only inputs produce empty output, not stray tags."""
assert render_prose("") == ""
assert render_prose(" ") == ""
def test_render_turn_html_includes_role_class():
out = render_turn_html("BotA", "Hello.", role="bot")
assert 'class="turn turn-bot"' in out
assert "<strong>BotA</strong>" in out
assert "Hello." in out
def test_render_turn_html_escapes_speaker():
"""Speaker label is also HTML-escaped — names are user-controlled."""
out = render_turn_html("<bad>", "hi", role="you")
# Raw tag should not appear; escaped form should.
assert "<bad>" not in out
assert "&lt;bad&gt;" in out
def test_render_prose_mixed_full_message():
"""Realistic turn with action, dialogue, and an OOC aside."""
text = "*looks up* \"You're back late.\" ((she's tired))"
out = render_prose(text)
assert '<em class="action">looks up</em>' in out
# The apostrophe in ``she's`` is HTML-escaped to ``&#x27;``.
assert '<span class="ooc">((she&#x27;s tired))</span>' in out