From 330077afcf83af23f18b670f63a5c472041ba4a9 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 14:22:43 -0400 Subject: [PATCH] feat: transcript display formatting with markdown and OOC styling --- chat/static/app.css | 24 +++++++++ chat/templates/chat.html | 2 +- chat/web/chat.py | 6 +++ chat/web/render.py | 106 +++++++++++++++++++++++++++++++++++++++ chat/web/turns.py | 11 +--- tests/test_render.py | 87 ++++++++++++++++++++++++++++++++ 6 files changed, 225 insertions(+), 11 deletions(-) create mode 100644 chat/web/render.py create mode 100644 tests/test_render.py diff --git a/chat/static/app.css b/chat/static/app.css index 915aa1c..86d8b72 100644 --- a/chat/static/app.css +++ b/chat/static/app.css @@ -75,6 +75,30 @@ code { font-family: ui-monospace, "SF Mono", Menlo, monospace; } .drawer-toggle { padding: 4px 10px; border: 1px solid #ccc; background: #fff; color: #1c1c1c; border-radius: 3px; cursor: pointer; } .timeline { flex: 1; overflow-y: auto; min-height: 200px; padding: 8px 0; } .turn { margin: 12px 0; } +.turn strong { display: block; margin-bottom: 4px; } +.turn p { margin: 0 0 8px; } +.turn p:last-child { margin-bottom: 0; } +.turn-you strong { color: #1a73e8; } +.turn-bot strong { color: #1c1c1c; } +/* ``*action*`` — italic narration. */ +.action { font-style: italic; color: #555; } +/* ``((ooc))`` — author-to-system aside. Dim, italic, smaller, set off + from surrounding prose so it doesn't read as in-fiction speech. */ +.ooc { + font-style: italic; + font-size: 12px; + color: #999; + display: inline-block; + background: rgba(0, 0, 0, 0.04); + padding: 1px 4px; + border-radius: 3px; +} +.turn blockquote { + border-left: 3px solid #ccc; + padding-left: 12px; + margin: 8px 0; + color: #555; +} .turn-input { display: flex; flex-direction: column; gap: 8px; padding-top: 12px; border-top: 1px solid #e5e5e5; } .turn-input textarea { padding: 8px; font: inherit; border: 1px solid #ccc; border-radius: 3px; resize: vertical; } .drawer { position: fixed; top: 0; right: 0; width: 360px; height: 100vh; background: #fff; border-left: 1px solid #e5e5e5; padding: 16px; overflow-y: auto; z-index: 10; } diff --git a/chat/templates/chat.html b/chat/templates/chat.html index 0e45b6b..0cadfd6 100644 --- a/chat/templates/chat.html +++ b/chat/templates/chat.html @@ -19,7 +19,7 @@ {% for turn in turns %}
{{ turn.speaker }} -

{{ turn.text }}

+ {{ turn.text|render_prose|safe }}
{% endfor %} {% endif %} diff --git a/chat/web/chat.py b/chat/web/chat.py index b90b74c..0486d8e 100644 --- a/chat/web/chat.py +++ b/chat/web/chat.py @@ -16,11 +16,17 @@ from fastapi.templating import Jinja2Templates from chat.state.entities import get_bot from chat.state.world import get_chat from chat.web.bots import get_conn +from chat.web.render import render_prose from chat.web.turns import _read_recent_dialogue TEMPLATES = Jinja2Templates( directory=str(Path(__file__).resolve().parent.parent / "templates") ) +# Register the prose renderer as a Jinja filter so the chat-detail +# template can use ``{{ turn.text|render_prose|safe }}`` (Task 33). +# The renderer escapes user content internally; ``|safe`` is required +# because the output contains intentional ``

``/````/etc. tags. +TEMPLATES.env.filters["render_prose"] = render_prose router = APIRouter() diff --git a/chat/web/render.py b/chat/web/render.py new file mode 100644 index 0000000..6a2a286 --- /dev/null +++ b/chat/web/render.py @@ -0,0 +1,106 @@ +"""Transcript display formatting (Task 33, Requirements §16.3). + +Bot and user prose is rendered with **lightweight markdown**: + +* ``*action*`` → ```` — italic narration. +* ``**bold**`` → ```` — emphasis. +* ``((ooc))`` → ``((ooc))`` — author-to-system + asides; visible to the reader, dimmed/italic in CSS, and stripped from + the prompt sent to the bot (see :func:`chat.web.turns._strip_ooc_for_prompt`). +* ``> line`` → ``

line
``. +* Double newline → paragraph break. +* Everything else is HTML-escaped and wrapped in ``

``. + +No headings, code blocks, links, images, or tables — out of scope per +Requirements §16.3. The renderer is the single source of truth used by +both the chat-detail GET (initial timeline render, via Jinja filter) and +the per-turn SSE fragments emitted from :mod:`chat.web.turns`. + +Order of operations matters: + +1. ``html.escape`` the whole input first — every replacement below assumes + user-supplied ``<``/``>``/``&`` are already neutralised, so the wrapper + tags we add can never collide with an attacker-controlled tag. +2. OOC wrap before action/bold so its inner ``*`` are not interpreted. +3. Bold (``**``) before action (``*``) — the bold pattern is stricter and + would otherwise be partially consumed by the action regex. +4. Blockquote pass over already-escaped lines (so we match ``>``). +5. Paragraph split on double newline. +""" + +from __future__ import annotations + +import html +import re + +# ``((…))`` — non-greedy, allows newlines so a multi-line OOC aside still +# wraps cleanly. The inner ``[^)]*?`` keeps it from spanning across a +# closing-paren boundary. +_OOC_PATTERN = re.compile(r"\(\([^)]*?\)\)", re.DOTALL) + +# ``**bold**`` — strict: no embedded asterisks or newlines. Must run +# *before* the single-asterisk action pattern, otherwise ``**x**`` would +# be partly consumed by ``*…*``. +_BOLD_PATTERN = re.compile(r"\*\*([^*\n]+)\*\*") + +# ``*action*`` — single-asterisk italics; same restriction as bold. +_ACTION_PATTERN = re.compile(r"\*([^*\n]+)\*") + +# ``> line`` at start of a line — note we match the *escaped* form +# ``>`` because this pass runs after ``html.escape``. +_BLOCKQUOTE_PATTERN = re.compile(r"^>\s?(.+)$", re.MULTILINE) + + +def render_prose(text: str) -> str: + """Render prose to safe HTML. + + Returns an empty string for empty/whitespace-only input so the caller + can append the result without producing stray ``

`` tags. + """ + if not text or not text.strip(): + return "" + + # Normalise CRLF so paragraph splitting on ``\n\n`` works for input + # pasted from Windows clients. + text = text.replace("\r\n", "\n").replace("\r", "\n") + + escaped = html.escape(text) + + # OOC first — the wrapped span survives subsequent passes. + escaped = _OOC_PATTERN.sub( + lambda m: f'{m.group(0)}', escaped + ) + + # Bold strictly before action (regex precedence — see module docstring). + escaped = _BOLD_PATTERN.sub(r"\1", escaped) + escaped = _ACTION_PATTERN.sub(r'\1', escaped) + + # Blockquote on already-escaped ``>`` markers. + escaped = _BLOCKQUOTE_PATTERN.sub(r"
\1
", escaped) + + # Paragraph splitting — drop empty fragments so a trailing ``\n\n`` + # doesn't yield an empty ``

`` block. + paragraphs = [p.strip() for p in escaped.split("\n\n") if p.strip()] + return "".join(f"

{p}

" for p in paragraphs) + + +def render_turn_html(speaker: str, text: str, role: str = "bot") -> str: + """Render a full transcript turn as ``
``. + + Used by both the SSE fragment publisher in :mod:`chat.web.turns` + (per-turn live updates) and indirectly by the chat-detail Jinja + template (initial render, via the ``render_prose`` filter). + + ``role`` selects the CSS class (``turn-you`` vs ``turn-bot``); the + speaker label and role name are HTML-escaped defensively even though + they currently come from trusted server-side state. + """ + speaker_html = html.escape(speaker) + role_html = html.escape(role) + body_html = render_prose(text) + return ( + f'
' + f"{speaker_html}" + f"{body_html}" + f"
" + ) diff --git a/chat/web/turns.py b/chat/web/turns.py index cd264ec..4105704 100644 --- a/chat/web/turns.py +++ b/chat/web/turns.py @@ -53,6 +53,7 @@ from chat.state.world import active_scene, get_chat, get_container from chat.web.bots import get_conn from chat.web.kickoff import get_llm_client from chat.web.pubsub import publish +from chat.web.render import render_turn_html as _render_turn_html router = APIRouter() @@ -102,16 +103,6 @@ def _read_recent_dialogue(conn, chat_id: str, limit: int = 200) -> list[dict]: return out -def _render_turn_html(speaker_label: str, text: str, *, role: str) -> str: - """Render a single turn as a small HTML fragment (escaped).""" - return ( - f'
' - f"{html.escape(speaker_label)}" - f"

{html.escape(text)}

" - f"
" - ) - - @router.post("/chats/{chat_id}/turns") async def post_turn( chat_id: str, diff --git a/tests/test_render.py b/tests/test_render.py new file mode 100644 index 0000000..23c263b --- /dev/null +++ b/tests/test_render.py @@ -0,0 +1,87 @@ +"""Tests for the transcript renderer (Task 33). + +Lightweight markdown for transcript turns: +- ``*action*`` → ``action`` +- ``**bold**`` → ``bold`` +- ``((ooc))`` → ``((ooc))`` +- ``> line`` → ``
line
`` +- paragraph breaks (double newline) → ``

`` +- everything HTML-escaped first + +No headings, no code blocks, no links — out of scope per Requirements §16.3. +""" + +from __future__ import annotations + +from chat.web.render import render_prose, render_turn_html + + +def test_render_prose_escapes_html(): + """Raw HTML in user content must be escaped — no XSS surface.""" + out = render_prose("") + assert "