Files
chat/chat/web/render.py
T

107 lines
4.3 KiB
Python

"""Transcript display formatting (Task 33, Requirements §16.3).
Bot and user prose is rendered with **lightweight markdown**:
* ``*action*`` → ``<em class="action">…</em>`` — italic narration.
* ``**bold**`` → ``<strong>…</strong>`` — emphasis.
* ``((ooc))`` → ``<span class="ooc">((ooc))</span>`` — author-to-system
asides; visible to the reader, dimmed/italic in CSS, and stripped from
the prompt sent to the bot (see :func:`chat.web.turns._strip_ooc_for_prompt`).
* ``> line`` → ``<blockquote>line</blockquote>``.
* Double newline → paragraph break.
* Everything else is HTML-escaped and wrapped in ``<p>…</p>``.
No headings, code blocks, links, images, or tables — out of scope per
Requirements §16.3. The renderer is the single source of truth used by
both the chat-detail GET (initial timeline render, via Jinja filter) and
the per-turn SSE fragments emitted from :mod:`chat.web.turns`.
Order of operations matters:
1. ``html.escape`` the whole input first — every replacement below assumes
user-supplied ``<``/``>``/``&`` are already neutralised, so the wrapper
tags we add can never collide with an attacker-controlled tag.
2. OOC wrap before action/bold so its inner ``*`` are not interpreted.
3. Bold (``**``) before action (``*``) — the bold pattern is stricter and
would otherwise be partially consumed by the action regex.
4. Blockquote pass over already-escaped lines (so we match ``&gt;``).
5. Paragraph split on double newline.
"""
from __future__ import annotations
import html
import re
# ``((…))`` — non-greedy, allows newlines so a multi-line OOC aside still
# wraps cleanly. The inner ``[^)]*?`` keeps it from spanning across a
# closing-paren boundary.
_OOC_PATTERN = re.compile(r"\(\([^)]*?\)\)", re.DOTALL)
# ``**bold**`` — strict: no embedded asterisks or newlines. Must run
# *before* the single-asterisk action pattern, otherwise ``**x**`` would
# be partly consumed by ``*…*``.
_BOLD_PATTERN = re.compile(r"\*\*([^*\n]+)\*\*")
# ``*action*`` — single-asterisk italics; same restriction as bold.
_ACTION_PATTERN = re.compile(r"\*([^*\n]+)\*")
# ``> line`` at start of a line — note we match the *escaped* form
# ``&gt;`` because this pass runs after ``html.escape``.
_BLOCKQUOTE_PATTERN = re.compile(r"^&gt;\s?(.+)$", re.MULTILINE)
def render_prose(text: str) -> str:
"""Render prose to safe HTML.
Returns an empty string for empty/whitespace-only input so the caller
can append the result without producing stray ``<p></p>`` tags.
"""
if not text or not text.strip():
return ""
# Normalise CRLF so paragraph splitting on ``\n\n`` works for input
# pasted from Windows clients.
text = text.replace("\r\n", "\n").replace("\r", "\n")
escaped = html.escape(text)
# OOC first — the wrapped span survives subsequent passes.
escaped = _OOC_PATTERN.sub(
lambda m: f'<span class="ooc">{m.group(0)}</span>', escaped
)
# Bold strictly before action (regex precedence — see module docstring).
escaped = _BOLD_PATTERN.sub(r"<strong>\1</strong>", escaped)
escaped = _ACTION_PATTERN.sub(r'<em class="action">\1</em>', escaped)
# Blockquote on already-escaped ``&gt;`` markers.
escaped = _BLOCKQUOTE_PATTERN.sub(r"<blockquote>\1</blockquote>", escaped)
# Paragraph splitting — drop empty fragments so a trailing ``\n\n``
# doesn't yield an empty ``<p></p>`` block.
paragraphs = [p.strip() for p in escaped.split("\n\n") if p.strip()]
return "".join(f"<p>{p}</p>" for p in paragraphs)
def render_turn_html(speaker: str, text: str, role: str = "bot") -> str:
"""Render a full transcript turn as ``<div class="turn …">…</div>``.
Used by both the SSE fragment publisher in :mod:`chat.web.turns`
(per-turn live updates) and indirectly by the chat-detail Jinja
template (initial render, via the ``render_prose`` filter).
``role`` selects the CSS class (``turn-you`` vs ``turn-bot``); the
speaker label and role name are HTML-escaped defensively even though
they currently come from trusted server-side state.
"""
speaker_html = html.escape(speaker)
role_html = html.escape(role)
body_html = render_prose(text)
return (
f'<div class="turn turn-{role_html}">'
f"<strong>{speaker_html}</strong>"
f"{body_html}"
f"</div>"
)