feat: transcript display formatting with markdown and OOC styling
This commit is contained in:
@@ -75,6 +75,30 @@ code { font-family: ui-monospace, "SF Mono", Menlo, monospace; }
|
||||
.drawer-toggle { padding: 4px 10px; border: 1px solid #ccc; background: #fff; color: #1c1c1c; border-radius: 3px; cursor: pointer; }
|
||||
.timeline { flex: 1; overflow-y: auto; min-height: 200px; padding: 8px 0; }
|
||||
.turn { margin: 12px 0; }
|
||||
.turn strong { display: block; margin-bottom: 4px; }
|
||||
.turn p { margin: 0 0 8px; }
|
||||
.turn p:last-child { margin-bottom: 0; }
|
||||
.turn-you strong { color: #1a73e8; }
|
||||
.turn-bot strong { color: #1c1c1c; }
|
||||
/* ``*action*`` — italic narration. */
|
||||
.action { font-style: italic; color: #555; }
|
||||
/* ``((ooc))`` — author-to-system aside. Dim, italic, smaller, set off
|
||||
from surrounding prose so it doesn't read as in-fiction speech. */
|
||||
.ooc {
|
||||
font-style: italic;
|
||||
font-size: 12px;
|
||||
color: #999;
|
||||
display: inline-block;
|
||||
background: rgba(0, 0, 0, 0.04);
|
||||
padding: 1px 4px;
|
||||
border-radius: 3px;
|
||||
}
|
||||
.turn blockquote {
|
||||
border-left: 3px solid #ccc;
|
||||
padding-left: 12px;
|
||||
margin: 8px 0;
|
||||
color: #555;
|
||||
}
|
||||
.turn-input { display: flex; flex-direction: column; gap: 8px; padding-top: 12px; border-top: 1px solid #e5e5e5; }
|
||||
.turn-input textarea { padding: 8px; font: inherit; border: 1px solid #ccc; border-radius: 3px; resize: vertical; }
|
||||
.drawer { position: fixed; top: 0; right: 0; width: 360px; height: 100vh; background: #fff; border-left: 1px solid #e5e5e5; padding: 16px; overflow-y: auto; z-index: 10; }
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
{% for turn in turns %}
|
||||
<div class="turn turn-{{ turn.role }}">
|
||||
<strong>{{ turn.speaker }}</strong>
|
||||
<p>{{ turn.text }}</p>
|
||||
{{ turn.text|render_prose|safe }}
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
@@ -16,11 +16,17 @@ from fastapi.templating import Jinja2Templates
|
||||
from chat.state.entities import get_bot
|
||||
from chat.state.world import get_chat
|
||||
from chat.web.bots import get_conn
|
||||
from chat.web.render import render_prose
|
||||
from chat.web.turns import _read_recent_dialogue
|
||||
|
||||
TEMPLATES = Jinja2Templates(
|
||||
directory=str(Path(__file__).resolve().parent.parent / "templates")
|
||||
)
|
||||
# Register the prose renderer as a Jinja filter so the chat-detail
|
||||
# template can use ``{{ turn.text|render_prose|safe }}`` (Task 33).
|
||||
# The renderer escapes user content internally; ``|safe`` is required
|
||||
# because the output contains intentional ``<p>``/``<em>``/etc. tags.
|
||||
TEMPLATES.env.filters["render_prose"] = render_prose
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
"""Transcript display formatting (Task 33, Requirements §16.3).
|
||||
|
||||
Bot and user prose is rendered with **lightweight markdown**:
|
||||
|
||||
* ``*action*`` → ``<em class="action">…</em>`` — italic narration.
|
||||
* ``**bold**`` → ``<strong>…</strong>`` — emphasis.
|
||||
* ``((ooc))`` → ``<span class="ooc">((ooc))</span>`` — author-to-system
|
||||
asides; visible to the reader, dimmed/italic in CSS, and stripped from
|
||||
the prompt sent to the bot (see :func:`chat.web.turns._strip_ooc_for_prompt`).
|
||||
* ``> line`` → ``<blockquote>line</blockquote>``.
|
||||
* Double newline → paragraph break.
|
||||
* Everything else is HTML-escaped and wrapped in ``<p>…</p>``.
|
||||
|
||||
No headings, code blocks, links, images, or tables — out of scope per
|
||||
Requirements §16.3. The renderer is the single source of truth used by
|
||||
both the chat-detail GET (initial timeline render, via Jinja filter) and
|
||||
the per-turn SSE fragments emitted from :mod:`chat.web.turns`.
|
||||
|
||||
Order of operations matters:
|
||||
|
||||
1. ``html.escape`` the whole input first — every replacement below assumes
|
||||
user-supplied ``<``/``>``/``&`` are already neutralised, so the wrapper
|
||||
tags we add can never collide with an attacker-controlled tag.
|
||||
2. OOC wrap before action/bold so its inner ``*`` are not interpreted.
|
||||
3. Bold (``**``) before action (``*``) — the bold pattern is stricter and
|
||||
would otherwise be partially consumed by the action regex.
|
||||
4. Blockquote pass over already-escaped lines (so we match ``>``).
|
||||
5. Paragraph split on double newline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import re
|
||||
|
||||
# ``((…))`` — non-greedy, allows newlines so a multi-line OOC aside still
|
||||
# wraps cleanly. The inner ``[^)]*?`` keeps it from spanning across a
|
||||
# closing-paren boundary.
|
||||
_OOC_PATTERN = re.compile(r"\(\([^)]*?\)\)", re.DOTALL)
|
||||
|
||||
# ``**bold**`` — strict: no embedded asterisks or newlines. Must run
|
||||
# *before* the single-asterisk action pattern, otherwise ``**x**`` would
|
||||
# be partly consumed by ``*…*``.
|
||||
_BOLD_PATTERN = re.compile(r"\*\*([^*\n]+)\*\*")
|
||||
|
||||
# ``*action*`` — single-asterisk italics; same restriction as bold.
|
||||
_ACTION_PATTERN = re.compile(r"\*([^*\n]+)\*")
|
||||
|
||||
# ``> line`` at start of a line — note we match the *escaped* form
|
||||
# ``>`` because this pass runs after ``html.escape``.
|
||||
_BLOCKQUOTE_PATTERN = re.compile(r"^>\s?(.+)$", re.MULTILINE)
|
||||
|
||||
|
||||
def render_prose(text: str) -> str:
|
||||
"""Render prose to safe HTML.
|
||||
|
||||
Returns an empty string for empty/whitespace-only input so the caller
|
||||
can append the result without producing stray ``<p></p>`` tags.
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return ""
|
||||
|
||||
# Normalise CRLF so paragraph splitting on ``\n\n`` works for input
|
||||
# pasted from Windows clients.
|
||||
text = text.replace("\r\n", "\n").replace("\r", "\n")
|
||||
|
||||
escaped = html.escape(text)
|
||||
|
||||
# OOC first — the wrapped span survives subsequent passes.
|
||||
escaped = _OOC_PATTERN.sub(
|
||||
lambda m: f'<span class="ooc">{m.group(0)}</span>', escaped
|
||||
)
|
||||
|
||||
# Bold strictly before action (regex precedence — see module docstring).
|
||||
escaped = _BOLD_PATTERN.sub(r"<strong>\1</strong>", escaped)
|
||||
escaped = _ACTION_PATTERN.sub(r'<em class="action">\1</em>', escaped)
|
||||
|
||||
# Blockquote on already-escaped ``>`` markers.
|
||||
escaped = _BLOCKQUOTE_PATTERN.sub(r"<blockquote>\1</blockquote>", escaped)
|
||||
|
||||
# Paragraph splitting — drop empty fragments so a trailing ``\n\n``
|
||||
# doesn't yield an empty ``<p></p>`` block.
|
||||
paragraphs = [p.strip() for p in escaped.split("\n\n") if p.strip()]
|
||||
return "".join(f"<p>{p}</p>" for p in paragraphs)
|
||||
|
||||
|
||||
def render_turn_html(speaker: str, text: str, role: str = "bot") -> str:
|
||||
"""Render a full transcript turn as ``<div class="turn …">…</div>``.
|
||||
|
||||
Used by both the SSE fragment publisher in :mod:`chat.web.turns`
|
||||
(per-turn live updates) and indirectly by the chat-detail Jinja
|
||||
template (initial render, via the ``render_prose`` filter).
|
||||
|
||||
``role`` selects the CSS class (``turn-you`` vs ``turn-bot``); the
|
||||
speaker label and role name are HTML-escaped defensively even though
|
||||
they currently come from trusted server-side state.
|
||||
"""
|
||||
speaker_html = html.escape(speaker)
|
||||
role_html = html.escape(role)
|
||||
body_html = render_prose(text)
|
||||
return (
|
||||
f'<div class="turn turn-{role_html}">'
|
||||
f"<strong>{speaker_html}</strong>"
|
||||
f"{body_html}"
|
||||
f"</div>"
|
||||
)
|
||||
+1
-10
@@ -53,6 +53,7 @@ from chat.state.world import active_scene, get_chat, get_container
|
||||
from chat.web.bots import get_conn
|
||||
from chat.web.kickoff import get_llm_client
|
||||
from chat.web.pubsub import publish
|
||||
from chat.web.render import render_turn_html as _render_turn_html
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -102,16 +103,6 @@ def _read_recent_dialogue(conn, chat_id: str, limit: int = 200) -> list[dict]:
|
||||
return out
|
||||
|
||||
|
||||
def _render_turn_html(speaker_label: str, text: str, *, role: str) -> str:
|
||||
"""Render a single turn as a small HTML fragment (escaped)."""
|
||||
return (
|
||||
f'<div class="turn turn-{role}">'
|
||||
f"<strong>{html.escape(speaker_label)}</strong>"
|
||||
f"<p>{html.escape(text)}</p>"
|
||||
f"</div>"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/chats/{chat_id}/turns")
|
||||
async def post_turn(
|
||||
chat_id: str,
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
"""Tests for the transcript renderer (Task 33).
|
||||
|
||||
Lightweight markdown for transcript turns:
|
||||
- ``*action*`` → ``<em class="action">action</em>``
|
||||
- ``**bold**`` → ``<strong>bold</strong>``
|
||||
- ``((ooc))`` → ``<span class="ooc">((ooc))</span>``
|
||||
- ``> line`` → ``<blockquote>line</blockquote>``
|
||||
- paragraph breaks (double newline) → ``</p><p>``
|
||||
- everything HTML-escaped first
|
||||
|
||||
No headings, no code blocks, no links — out of scope per Requirements §16.3.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from chat.web.render import render_prose, render_turn_html
|
||||
|
||||
|
||||
def test_render_prose_escapes_html():
|
||||
"""Raw HTML in user content must be escaped — no XSS surface."""
|
||||
out = render_prose("<script>alert(1)</script>")
|
||||
assert "<script>" not in out
|
||||
assert "<script>" in out
|
||||
|
||||
|
||||
def test_render_prose_action_to_italic():
|
||||
out = render_prose("*walks over*")
|
||||
assert '<em class="action">walks over</em>' in out
|
||||
|
||||
|
||||
def test_render_prose_bold_before_action():
|
||||
"""Bold (``**``) must be processed before action (``*``)."""
|
||||
out = render_prose("**emphasis** and *action*")
|
||||
assert "<strong>emphasis</strong>" in out
|
||||
assert '<em class="action">action</em>' in out
|
||||
# Make sure we didn't double-wrap: no stray asterisks left behind.
|
||||
assert "*" not in out
|
||||
|
||||
|
||||
def test_render_prose_ooc_wrapped():
|
||||
out = render_prose("((this is OOC))")
|
||||
assert '<span class="ooc">' in out
|
||||
assert "((this is OOC))" in out
|
||||
|
||||
|
||||
def test_render_prose_paragraphs():
|
||||
out = render_prose("First.\n\nSecond.")
|
||||
# Two <p> opens and two closes.
|
||||
assert out.count("<p>") == 2
|
||||
assert out.count("</p>") == 2
|
||||
assert "<p>First.</p>" in out
|
||||
assert "<p>Second.</p>" in out
|
||||
|
||||
|
||||
def test_render_prose_blockquote():
|
||||
out = render_prose("> a quote")
|
||||
assert "<blockquote>a quote</blockquote>" in out
|
||||
|
||||
|
||||
def test_render_prose_empty():
|
||||
"""Empty / whitespace-only inputs produce empty output, not stray tags."""
|
||||
assert render_prose("") == ""
|
||||
assert render_prose(" ") == ""
|
||||
|
||||
|
||||
def test_render_turn_html_includes_role_class():
|
||||
out = render_turn_html("BotA", "Hello.", role="bot")
|
||||
assert 'class="turn turn-bot"' in out
|
||||
assert "<strong>BotA</strong>" in out
|
||||
assert "Hello." in out
|
||||
|
||||
|
||||
def test_render_turn_html_escapes_speaker():
|
||||
"""Speaker label is also HTML-escaped — names are user-controlled."""
|
||||
out = render_turn_html("<bad>", "hi", role="you")
|
||||
# Raw tag should not appear; escaped form should.
|
||||
assert "<bad>" not in out
|
||||
assert "<bad>" in out
|
||||
|
||||
|
||||
def test_render_prose_mixed_full_message():
|
||||
"""Realistic turn with action, dialogue, and an OOC aside."""
|
||||
text = "*looks up* \"You're back late.\" ((she's tired))"
|
||||
out = render_prose(text)
|
||||
assert '<em class="action">looks up</em>' in out
|
||||
# The apostrophe in ``she's`` is HTML-escaped to ``'``.
|
||||
assert '<span class="ooc">((she's tired))</span>' in out
|
||||
Reference in New Issue
Block a user