feat: narrative streaming via SSE with assistant_turn event
This commit is contained in:
@@ -20,6 +20,7 @@ from chat.web.kickoff import router as kickoff_router
|
||||
from chat.web.nav import router as nav_router
|
||||
from chat.web.settings import router as settings_router
|
||||
from chat.web.sse import router as sse_router
|
||||
from chat.web.turns import router as turns_router
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
@@ -42,6 +43,7 @@ app.include_router(settings_router)
|
||||
app.include_router(nav_router)
|
||||
app.include_router(chat_router)
|
||||
app.include_router(sse_router)
|
||||
app.include_router(turns_router)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
{% extends "layout.html" %}
|
||||
{% block title %}{{ host_bot.name }} - chat{% endblock %}
|
||||
{% block content %}
|
||||
<div class="chat-shell" data-chat-id="{{ chat.id }}">
|
||||
<div class="chat-shell" data-chat-id="{{ chat.id }}"
|
||||
hx-ext="sse"
|
||||
sse-connect="/chats/{{ chat.id }}/events">
|
||||
<header class="chat-header">
|
||||
<h1>{{ host_bot.name }}</h1>
|
||||
<div class="chat-meta muted">{{ chat.time }}</div>
|
||||
<button class="drawer-toggle" type="button" aria-controls="drawer" aria-expanded="false">Drawer</button>
|
||||
</header>
|
||||
|
||||
<section class="timeline" id="timeline">
|
||||
<section class="timeline" id="timeline"
|
||||
sse-swap="turn_html"
|
||||
hx-swap="beforeend">
|
||||
{% if not turns %}
|
||||
<p class="muted">No turns yet. Start typing below.</p>
|
||||
{% else %}
|
||||
|
||||
+13
-2
@@ -16,6 +16,7 @@ from fastapi.templating import Jinja2Templates
|
||||
from chat.state.entities import get_bot
|
||||
from chat.state.world import get_chat
|
||||
from chat.web.bots import get_conn
|
||||
from chat.web.turns import _read_recent_dialogue
|
||||
|
||||
TEMPLATES = Jinja2Templates(
|
||||
directory=str(Path(__file__).resolve().parent.parent / "templates")
|
||||
@@ -38,9 +39,19 @@ async def chat_detail(chat_id: str, request: Request, conn=Depends(get_conn)):
|
||||
status_code=404, detail=f"host bot not found: {chat['host_bot_id']}"
|
||||
)
|
||||
|
||||
# Phase 1, T15: timeline starts empty. T19 will populate from event_log
|
||||
# by reading user_turn / assistant_turn events for this chat.
|
||||
# T19: render the timeline from event_log. We pull both user_turn and
|
||||
# assistant_turn events for this chat, in chronological order. Each row
|
||||
# is shaped ``{"speaker": ..., "text": ...}`` and the template
|
||||
# discriminates roles via the speaker id (the literal "you" vs. a bot id).
|
||||
raw_turns = _read_recent_dialogue(conn, chat_id, limit=200)
|
||||
turns: list[dict] = []
|
||||
for t in raw_turns:
|
||||
if t["speaker"] == "you":
|
||||
turns.append({"role": "you", "speaker": "you", "text": t["text"]})
|
||||
else:
|
||||
bot = get_bot(conn, t["speaker"])
|
||||
label = bot["name"] if bot else t["speaker"]
|
||||
turns.append({"role": "bot", "speaker": label, "text": t["text"]})
|
||||
|
||||
return TEMPLATES.TemplateResponse(
|
||||
request,
|
||||
|
||||
+21
-5
@@ -32,9 +32,17 @@ router = APIRouter()
|
||||
_KEEPALIVE_SECONDS = 15.0
|
||||
|
||||
|
||||
def _format_sse(event: str, data: dict) -> bytes:
|
||||
"""Format a single SSE frame: ``event: <name>\\ndata: <json>\\n\\n``."""
|
||||
payload = json.dumps(data)
|
||||
def _format_sse(event: str, data: dict | str) -> bytes:
|
||||
"""Format a single SSE frame: ``event: <name>\\ndata: <body>\\n\\n``.
|
||||
|
||||
``data`` may be a dict (JSON-serialized) or a raw string. The string
|
||||
form is used for HTMX SSE swaps where the payload is an HTML
|
||||
fragment that the client splices into the DOM verbatim.
|
||||
"""
|
||||
if isinstance(data, str):
|
||||
payload = data
|
||||
else:
|
||||
payload = json.dumps(data)
|
||||
return f"event: {event}\ndata: {payload}\n\n".encode("utf-8")
|
||||
|
||||
|
||||
@@ -62,10 +70,18 @@ async def chat_events(chat_id: str, request: Request, conn=Depends(get_conn)):
|
||||
yield b": keepalive\n\n"
|
||||
continue
|
||||
# Allow publishers to set the SSE event name via "event" key;
|
||||
# default to "message" if omitted.
|
||||
# default to "message" if omitted. When the remaining payload
|
||||
# is a single ``data`` string, send it verbatim — that lets
|
||||
# turn-flow publishers ship pre-rendered HTML fragments that
|
||||
# HTMX's SSE extension can swap into the DOM directly.
|
||||
event = dict(event) # don't mutate the published dict
|
||||
kind = event.pop("event", "message")
|
||||
yield _format_sse(kind, event)
|
||||
if set(event.keys()) == {"data"} and isinstance(
|
||||
event["data"], str
|
||||
):
|
||||
yield _format_sse(kind, event["data"])
|
||||
else:
|
||||
yield _format_sse(kind, event)
|
||||
finally:
|
||||
await unsubscribe(chat_id, queue)
|
||||
|
||||
|
||||
@@ -0,0 +1,239 @@
|
||||
"""POST ``/chats/<id>/turns`` — narrative turn flow with SSE streaming.
|
||||
|
||||
The turn flow strings together the pieces built in T17 (turn parser), T18
|
||||
(prompt assembler), and T16 (SSE channel):
|
||||
|
||||
1. Parse the user's prose with the classifier into typed segments.
|
||||
2. Append a ``user_turn`` event capturing both the original prose and the
|
||||
parsed segments.
|
||||
3. Append a placeholder ``assistant_turn_started`` marker so observers know
|
||||
a response is in flight.
|
||||
4. Build the narrative prompt, dropping OOC segments before they reach the
|
||||
bot (per Requirements §6.1 the OOC convention is for the author to talk
|
||||
to the system, not to the in-fiction bot).
|
||||
5. Stream tokens from the LLM, broadcasting each chunk over the chat's SSE
|
||||
channel as a ``token`` event so any subscribed browser tab sees them
|
||||
arrive in real time.
|
||||
6. On stream complete, append an ``assistant_turn`` event with the full
|
||||
text and ``truncated=False``. Also publish a ``turn_html`` event with a
|
||||
ready-to-swap HTML fragment so HTMX's SSE extension can append it to
|
||||
the timeline without a page reload.
|
||||
7. Return ``204 No Content`` — the SSE channel is the real conveyor of
|
||||
state, not the POST response body.
|
||||
|
||||
Errors during streaming flip the assistant_turn's ``truncated`` flag to
|
||||
``True`` and we still commit what we received. ``asyncio.CancelledError``
|
||||
is treated identically and re-raised after recording the partial turn.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import html
|
||||
import json
|
||||
|
||||
from fastapi import APIRouter, Depends, Form, HTTPException, Request
|
||||
from fastapi.responses import Response
|
||||
|
||||
from chat.eventlog.log import append_event
|
||||
from chat.services.prompt import assemble_narrative_prompt
|
||||
from chat.services.turn_parse import ParsedTurn, parse_turn
|
||||
from chat.state.world import get_chat
|
||||
from chat.state.entities import get_bot
|
||||
from chat.web.bots import get_conn
|
||||
from chat.web.kickoff import get_llm_client
|
||||
from chat.web.pubsub import publish
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _strip_ooc_for_prompt(parsed: ParsedTurn) -> str:
|
||||
"""Concatenate non-OOC segments back to a prose string for the prompt.
|
||||
|
||||
OOC segments (``((double parens))``) are kept in the user_turn payload
|
||||
for transcript display but stripped before assembly so the bot never
|
||||
sees author-to-system messages.
|
||||
"""
|
||||
keep = [s.text for s in parsed.segments if s.kind != "ooc"]
|
||||
return " ".join(keep).strip()
|
||||
|
||||
|
||||
def _read_recent_dialogue(conn, chat_id: str, limit: int = 200) -> list[dict]:
|
||||
"""Return ``user_turn`` and ``assistant_turn`` events for ``chat_id``.
|
||||
|
||||
Ordered oldest-first. Skips superseded and hidden rows so regenerated
|
||||
turns (T29) drop out of the rendered timeline. Each entry is shaped
|
||||
``{"speaker": <id-or-"you">, "text": <prose>}`` for the prompt
|
||||
assembler and the chat-detail template.
|
||||
"""
|
||||
cur = conn.execute(
|
||||
"SELECT id, kind, payload_json FROM event_log "
|
||||
"WHERE kind IN ('user_turn', 'assistant_turn') "
|
||||
" AND superseded_by IS NULL AND hidden = 0 "
|
||||
"ORDER BY id DESC LIMIT ?",
|
||||
(limit,),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
rows.reverse() # back to chronological order
|
||||
out: list[dict] = []
|
||||
for _row_id, kind, payload_json in rows:
|
||||
p = json.loads(payload_json)
|
||||
if p.get("chat_id") != chat_id:
|
||||
continue
|
||||
if kind == "user_turn":
|
||||
out.append({"speaker": "you", "text": p.get("prose", "")})
|
||||
else:
|
||||
out.append(
|
||||
{
|
||||
"speaker": p.get("speaker_id", "bot"),
|
||||
"text": p.get("text", ""),
|
||||
}
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def _render_turn_html(speaker_label: str, text: str, *, role: str) -> str:
|
||||
"""Render a single turn as a small HTML fragment (escaped)."""
|
||||
return (
|
||||
f'<div class="turn turn-{role}">'
|
||||
f"<strong>{html.escape(speaker_label)}</strong>"
|
||||
f"<p>{html.escape(text)}</p>"
|
||||
f"</div>"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/chats/{chat_id}/turns")
|
||||
async def post_turn(
|
||||
chat_id: str,
|
||||
request: Request,
|
||||
prose: str = Form(""),
|
||||
conn=Depends(get_conn),
|
||||
client=Depends(get_llm_client),
|
||||
):
|
||||
chat = get_chat(conn, chat_id)
|
||||
if chat is None:
|
||||
raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}")
|
||||
|
||||
host_bot = get_bot(conn, chat["host_bot_id"])
|
||||
if host_bot is None:
|
||||
# Defensive: chat row references a missing bot.
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"host bot not found: {chat['host_bot_id']}",
|
||||
)
|
||||
|
||||
settings = request.app.state.settings
|
||||
|
||||
# 1. Parse turn (classifier).
|
||||
parsed = await parse_turn(
|
||||
client, model=settings.classifier_model, prose=prose
|
||||
)
|
||||
prompt_prose = _strip_ooc_for_prompt(parsed)
|
||||
|
||||
# 2. Append user_turn event.
|
||||
user_turn_event_id = append_event(
|
||||
conn,
|
||||
kind="user_turn",
|
||||
payload={
|
||||
"chat_id": chat_id,
|
||||
"prose": prose,
|
||||
"segments": [s.model_dump() for s in parsed.segments],
|
||||
},
|
||||
)
|
||||
|
||||
# 3. Append assistant_turn_started placeholder. ``user_turn``,
|
||||
# ``assistant_turn_started``, and ``assistant_turn`` have no registered
|
||||
# projector handlers — they live in the event_log purely for transcript
|
||||
# rendering — so we don't call ``project`` here. (Re-projecting now would
|
||||
# also re-run prior non-idempotent inserts like ``chat_created``.)
|
||||
append_event(
|
||||
conn,
|
||||
kind="assistant_turn_started",
|
||||
payload={
|
||||
"chat_id": chat_id,
|
||||
"speaker_id": host_bot["id"],
|
||||
"user_turn_id": user_turn_event_id,
|
||||
},
|
||||
)
|
||||
|
||||
# 4. Build the narrative prompt.
|
||||
recent = _read_recent_dialogue(conn, chat_id, limit=20)
|
||||
# Drop the just-appended user turn from ``recent`` — it's passed as
|
||||
# ``user_turn_prose`` to the assembler and would otherwise duplicate.
|
||||
if recent and recent[-1].get("speaker") == "you":
|
||||
recent = recent[:-1]
|
||||
messages = assemble_narrative_prompt(
|
||||
conn,
|
||||
chat_id=chat_id,
|
||||
speaker_bot_id=host_bot["id"],
|
||||
user_turn_prose=prompt_prose if prompt_prose else None,
|
||||
recent_dialogue=recent,
|
||||
budget_soft=settings.narrative_budget_soft,
|
||||
budget_hard=settings.narrative_budget_hard,
|
||||
)
|
||||
|
||||
# 5. Stream and accumulate tokens.
|
||||
accumulated: list[str] = []
|
||||
truncated = False
|
||||
cancelled = False
|
||||
try:
|
||||
async for chunk in client.stream(
|
||||
messages, model=settings.narrative_model
|
||||
):
|
||||
accumulated.append(chunk)
|
||||
await publish(
|
||||
chat_id,
|
||||
{
|
||||
"event": "token",
|
||||
"text": chunk,
|
||||
"speaker_id": host_bot["id"],
|
||||
},
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
# Preserve the partial output before letting the cancellation
|
||||
# propagate so the transcript reflects what the user actually saw.
|
||||
truncated = True
|
||||
cancelled = True
|
||||
except Exception:
|
||||
# Surface as a truncated turn rather than losing the partial output.
|
||||
truncated = True
|
||||
|
||||
full_text = "".join(accumulated)
|
||||
|
||||
# 6. Append the assistant_turn with the final text. (See note above on
|
||||
# why we skip ``project`` for these transcript-only event kinds.)
|
||||
append_event(
|
||||
conn,
|
||||
kind="assistant_turn",
|
||||
payload={
|
||||
"chat_id": chat_id,
|
||||
"speaker_id": host_bot["id"],
|
||||
"text": full_text,
|
||||
"truncated": truncated,
|
||||
"user_turn_id": user_turn_event_id,
|
||||
},
|
||||
)
|
||||
|
||||
# 7. Broadcast a JSON completion event (for JS consumers) and an HTML
|
||||
# fragment event (for HTMX SSE swap-into-timeline).
|
||||
await publish(
|
||||
chat_id,
|
||||
{
|
||||
"event": "assistant_turn_complete",
|
||||
"speaker_id": host_bot["id"],
|
||||
"text": full_text,
|
||||
"truncated": truncated,
|
||||
},
|
||||
)
|
||||
assistant_html = _render_turn_html(
|
||||
host_bot["name"], full_text, role="bot"
|
||||
)
|
||||
await publish(
|
||||
chat_id, {"event": "turn_html", "data": assistant_html}
|
||||
)
|
||||
|
||||
if cancelled:
|
||||
# Re-raise after the partial-turn has been recorded.
|
||||
raise asyncio.CancelledError
|
||||
|
||||
return Response(status_code=204)
|
||||
@@ -0,0 +1,177 @@
|
||||
"""End-to-end turn flow (T19): user POSTs prose, server parses, streams via SSE.
|
||||
|
||||
Covers:
|
||||
- POST ``/chats/<id>/turns`` returns 404 when the chat doesn't exist.
|
||||
- A successful POST appends both a ``user_turn`` and an ``assistant_turn``
|
||||
event in chronological order. The assistant payload carries the full
|
||||
streamed text and ``truncated=False``.
|
||||
- After a turn lands, the chat detail GET renders the user prose and the
|
||||
assistant text from the event log.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from chat.app import app
|
||||
from chat.db.connection import open_db
|
||||
from chat.eventlog.log import append_event
|
||||
from chat.eventlog.projector import project
|
||||
from chat.llm.mock import MockLLMClient
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client(tmp_path, monkeypatch):
|
||||
cfg = tmp_path / "config.toml"
|
||||
cfg.write_text('featherless_api_key = "test"\n')
|
||||
monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
|
||||
db = tmp_path / "test.db"
|
||||
monkeypatch.setenv("CHAT_DB_PATH", str(db))
|
||||
|
||||
canned_parse = json.dumps(
|
||||
{"segments": [{"kind": "dialogue", "text": "hello"}]}
|
||||
)
|
||||
canned_response = "Hi there."
|
||||
|
||||
# Import here so env vars are visible to the dependency lookup.
|
||||
from chat.web.kickoff import get_llm_client
|
||||
|
||||
mock = MockLLMClient(canned=[canned_parse, canned_response])
|
||||
app.dependency_overrides[get_llm_client] = lambda: mock
|
||||
|
||||
with TestClient(app) as c:
|
||||
c.mock_llm = mock # type: ignore[attr-defined]
|
||||
yield c
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
def _seed(db_path: Path) -> None:
|
||||
"""Author a bot, create a chat, and seed enough state for prompt assembly."""
|
||||
with open_db(db_path) as conn:
|
||||
append_event(
|
||||
conn,
|
||||
kind="bot_authored",
|
||||
payload={
|
||||
"id": "bot_a",
|
||||
"name": "BotA",
|
||||
"persona": "thoughtful, observant",
|
||||
"voice_samples": [],
|
||||
"traits": [],
|
||||
"backstory": "",
|
||||
"initial_relationship_to_you": "",
|
||||
"kickoff_prose": "...",
|
||||
},
|
||||
)
|
||||
append_event(
|
||||
conn,
|
||||
kind="chat_created",
|
||||
payload={
|
||||
"id": "chat_bot_a",
|
||||
"host_bot_id": "bot_a",
|
||||
"initial_time": "2026-04-26T20:00:00+00:00",
|
||||
"narrative_anchor": "Day 1",
|
||||
"weather": "",
|
||||
},
|
||||
)
|
||||
# Seed an edge so the prompt assembler has something to render.
|
||||
append_event(
|
||||
conn,
|
||||
kind="edge_update",
|
||||
payload={
|
||||
"source_id": "bot_a",
|
||||
"target_id": "you",
|
||||
"chat_id": "chat_bot_a",
|
||||
"knowledge_facts": ["coworker"],
|
||||
},
|
||||
)
|
||||
# Activity for both speakers — required by the prompt assembler.
|
||||
append_event(
|
||||
conn,
|
||||
kind="activity_change",
|
||||
payload={
|
||||
"entity_id": "you",
|
||||
"posture": "sitting",
|
||||
"action": {
|
||||
"verb": "talking",
|
||||
"interruptible": True,
|
||||
"required_attention": "low",
|
||||
"expected_duration": "ongoing",
|
||||
},
|
||||
"attention": "",
|
||||
"holding": [],
|
||||
"status": {},
|
||||
},
|
||||
)
|
||||
append_event(
|
||||
conn,
|
||||
kind="activity_change",
|
||||
payload={
|
||||
"entity_id": "bot_a",
|
||||
"posture": "sitting",
|
||||
"action": {
|
||||
"verb": "listening",
|
||||
"interruptible": True,
|
||||
"required_attention": "low",
|
||||
"expected_duration": "ongoing",
|
||||
},
|
||||
"attention": "",
|
||||
"holding": [],
|
||||
"status": {},
|
||||
},
|
||||
)
|
||||
project(conn)
|
||||
|
||||
|
||||
def test_post_turn_404_when_chat_missing(client):
|
||||
response = client.post("/chats/no_such/turns", data={"prose": "hello"})
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
def test_post_turn_appends_user_and_assistant_events(client, tmp_path):
|
||||
_seed(tmp_path / "test.db")
|
||||
response = client.post(
|
||||
"/chats/chat_bot_a/turns", data={"prose": "hello"}
|
||||
)
|
||||
assert response.status_code == 204
|
||||
|
||||
with open_db(tmp_path / "test.db") as conn:
|
||||
cur = conn.execute(
|
||||
"SELECT kind, payload_json FROM event_log "
|
||||
"WHERE kind IN ('user_turn', 'assistant_turn') ORDER BY id"
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
assert len(rows) == 2
|
||||
assert rows[0][0] == "user_turn"
|
||||
assert rows[1][0] == "assistant_turn"
|
||||
|
||||
user_payload = json.loads(rows[0][1])
|
||||
assert user_payload["chat_id"] == "chat_bot_a"
|
||||
assert user_payload["prose"] == "hello"
|
||||
# Segments come from the canned classifier output.
|
||||
assert any(
|
||||
s.get("kind") == "dialogue" and s.get("text") == "hello"
|
||||
for s in user_payload["segments"]
|
||||
)
|
||||
|
||||
assistant_payload = json.loads(rows[1][1])
|
||||
assert assistant_payload["chat_id"] == "chat_bot_a"
|
||||
assert assistant_payload["speaker_id"] == "bot_a"
|
||||
assert assistant_payload["text"] == "Hi there."
|
||||
assert assistant_payload["truncated"] is False
|
||||
|
||||
|
||||
def test_get_chat_renders_existing_turns(client, tmp_path):
|
||||
_seed(tmp_path / "test.db")
|
||||
post = client.post("/chats/chat_bot_a/turns", data={"prose": "hello"})
|
||||
assert post.status_code == 204
|
||||
|
||||
response = client.get("/chats/chat_bot_a")
|
||||
assert response.status_code == 200
|
||||
body = response.text
|
||||
assert "hello" in body
|
||||
assert "Hi there." in body
|
||||
Reference in New Issue
Block a user