feat: regenerate with edit-then-regenerate inline UX

2026-04-26 14:04:02 -04:00
parent aa0563b4fa
commit 46062973c2
3 changed files with 607 additions and 0 deletions
@@ -0,0 +1,282 @@
+"""Regenerate flow (T29).
+
+The user clicks "Regenerate" on the latest ``assistant_turn``. The UI
+puts the prior ``user_turn`` into inline edit mode and submits to
+:func:`regenerate_assistant_turn` either:
+
+- with **no edit** — we re-run the narrative against the original user
+  prose and append a fresh ``assistant_turn`` superseding the old one;
+- with **edited prose** — we additionally append a ``user_turn_edit``
+  event capturing the new prose, mark the original ``user_turn`` as
+  superseded by the edit, then run the narrative against the edited
+  prose.
+
+Per Requirements §10.2 superseded events are *kept in the log* — the
+display layer hides them. This is what makes rewinding to before a
+regenerate cheap: we just clear ``superseded_by`` on the old row.
+
+The supersede update is one of the rare "direct DB write" exceptions
+documented in the plan: we manipulate metadata fields on the canonical
+event_log row itself rather than projecting through a handler.
+
+Phase 1 simplifications (per the plan's "bound it" guidance):
+
+- Significance pass is *not* re-run on regenerate. The original score
+  remains attached to the prior memory. The state-update pass *is* re-run
+  so affinity/trust/knowledge reflect the new output.
+- The route does not broadcast a fresh ``turn_html`` SSE event; T34
+  polishes UI swaps. The user refreshes the page to see the new turn.
+"""
+
+from __future__ import annotations
+
+import json
+from sqlite3 import Connection
+
+from chat.config import Settings
+from chat.eventlog.log import append_and_apply, append_event
+from chat.services.memory_write import record_turn_memory
+from chat.services.prompt import assemble_narrative_prompt
+from chat.services.state_update import compute_state_update
+from chat.state.edges import get_edge
+from chat.state.entities import get_bot, get_you
+from chat.state.world import active_scene, get_chat
+from chat.web.pubsub import publish
+
+
+async def regenerate_assistant_turn(
+    conn: Connection,
+    client,
+    *,
+    settings: Settings,
+    chat_id: str,
+    original_assistant_event_id: int,
+    edited_user_prose: str | None = None,
+) -> str:
+    """Regenerate the assistant turn linked to ``original_assistant_event_id``.
+
+    When ``edited_user_prose`` is provided the original user_turn is also
+    superseded by a fresh ``user_turn_edit`` event capturing the new
+    prose. Returns the new assistant text.
+
+    Raises :class:`ValueError` when the chat or the assistant_turn event
+    cannot be found — the FastAPI route translates this to 404.
+    """
+    chat = get_chat(conn, chat_id)
+    if chat is None:
+        raise ValueError("chat not found")
+    host_bot_id = chat["host_bot_id"]
+    host_bot = get_bot(conn, host_bot_id) or {
+        "id": host_bot_id,
+        "name": "bot",
+        "persona": "",
+    }
+
+    # 1. Locate the original assistant_turn event.
+    row = conn.execute(
+        "SELECT payload_json FROM event_log "
+        "WHERE id = ? AND kind = 'assistant_turn'",
+        (original_assistant_event_id,),
+    ).fetchone()
+    if row is None:
+        raise ValueError("assistant_turn event not found")
+    original_assistant_payload = json.loads(row[0])
+    original_user_turn_id = original_assistant_payload.get("user_turn_id")
+
+    # 2. Determine the prose for the new prompt and (when edited) capture
+    # the user_turn_edit event up front so the new event ids exist before
+    # we link them from the assistant_turn payload.
+    new_user_event_id: int | None = None
+    if edited_user_prose is not None:
+        new_user_event_id = append_event(
+            conn,
+            kind="user_turn_edit",
+            payload={
+                "chat_id": chat_id,
+                "prose": edited_user_prose,
+                "supersedes_user_turn_id": original_user_turn_id,
+            },
+        )
+        if original_user_turn_id is not None:
+            conn.execute(
+                "UPDATE event_log SET superseded_by = ? WHERE id = ?",
+                (new_user_event_id, original_user_turn_id),
+            )
+        prose_for_prompt = edited_user_prose
+    else:
+        original_user_row = conn.execute(
+            "SELECT payload_json FROM event_log WHERE id = ?",
+            (original_user_turn_id,),
+        ).fetchone() if original_user_turn_id is not None else None
+        if original_user_row is not None:
+            prose_for_prompt = json.loads(original_user_row[0]).get("prose", "")
+        else:
+            prose_for_prompt = ""
+
+    # 3. Build the recent-dialogue slice. Exclude the original
+    # assistant_turn explicitly (we haven't superseded it yet — that
+    # update lands at the end so the new event_id is known) and use the
+    # standard ``superseded_by IS NULL AND hidden = 0`` filter so any
+    # prior regenerates also drop out.
+    you_entity = get_you(conn) or {"name": "you", "persona": ""}
+    you_name = you_entity.get("name", "you")
+    cur = conn.execute(
+        "SELECT id, kind, payload_json FROM event_log "
+        "WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
+        "  AND id != ? "
+        "  AND superseded_by IS NULL AND hidden = 0 "
+        "ORDER BY id DESC LIMIT 20",
+        (original_assistant_event_id,),
+    )
+    rows = list(reversed(cur.fetchall()))
+    recent: list[dict] = []
+    for _eid, kind, payload_json in rows:
+        p = json.loads(payload_json)
+        if p.get("chat_id") != chat_id:
+            continue
+        if kind in ("user_turn", "user_turn_edit"):
+            recent.append({"speaker": you_name, "text": p.get("prose", "")})
+        else:
+            recent.append(
+                {"speaker": host_bot.get("name", "bot"), "text": p.get("text", "")}
+            )
+
+    # 4. Assemble the narrative prompt. ``recent`` already excludes the
+    # current user prose, which we pass through ``user_turn_prose``.
+    messages = assemble_narrative_prompt(
+        conn,
+        chat_id=chat_id,
+        speaker_bot_id=host_bot_id,
+        user_turn_prose=prose_for_prompt or None,
+        recent_dialogue=recent,
+        budget_soft=settings.narrative_budget_soft,
+        budget_hard=settings.narrative_budget_hard,
+    )
+
+    # 5. Stream the new narrative.
+    accumulated: list[str] = []
+    async for chunk in client.stream(
+        messages, model=settings.narrative_model
+    ):
+        accumulated.append(chunk)
+        await publish(
+            chat_id,
+            {"event": "token", "text": chunk, "speaker_id": host_bot_id},
+        )
+    new_text = "".join(accumulated)
+
+    # 6. Append the new assistant_turn event. ``user_turn_id`` points at
+    # the edit event when one was created, otherwise the original. The
+    # ``regenerated_from`` field is the back-pointer the UI uses for an
+    # "originally said …" affordance.
+    new_assistant_event_id = append_event(
+        conn,
+        kind="assistant_turn",
+        payload={
+            "chat_id": chat_id,
+            "speaker_id": host_bot_id,
+            "text": new_text,
+            "truncated": False,
+            "user_turn_id": (
+                new_user_event_id
+                if new_user_event_id is not None
+                else original_user_turn_id
+            ),
+            "regenerated_from": original_assistant_event_id,
+        },
+    )
+
+    # 7. Mark the original assistant_turn as superseded by the new one.
+    conn.execute(
+        "UPDATE event_log SET superseded_by = ? WHERE id = ?",
+        (new_assistant_event_id, original_assistant_event_id),
+    )
+
+    # 8. Re-run downstream classifier passes (memory write + state update
+    # for both directed edges). Significance is intentionally skipped on
+    # regenerate (the prior score remains attached to the prior memory).
+    scene = active_scene(conn, chat_id)
+    record_turn_memory(
+        conn,
+        chat_id=chat_id,
+        host_bot_id=host_bot_id,
+        narrative_text=new_text,
+        scene_id=scene["id"] if scene else None,
+        chat_clock_at=chat.get("time"),
+    )
+
+    last_at = chat.get("time")
+    recent_for_update = recent + [
+        {"speaker": host_bot.get("name", "bot"), "text": new_text}
+    ]
+
+    edge_b2y = get_edge(conn, host_bot_id, "you") or {
+        "affinity": 50,
+        "trust": 50,
+        "summary": "",
+    }
+    update_b2y = await compute_state_update(
+        client,
+        model=settings.classifier_model,
+        source_id=host_bot_id,
+        target_id="you",
+        source_name=host_bot.get("name", "bot"),
+        source_persona=host_bot.get("persona", "") or "",
+        target_name=you_name,
+        prior_affinity=edge_b2y["affinity"],
+        prior_trust=edge_b2y["trust"],
+        prior_summary=edge_b2y.get("summary", "") or "",
+        recent_dialogue=recent_for_update,
+    )
+    append_and_apply(
+        conn,
+        kind="edge_update",
+        payload={
+            "source_id": host_bot_id,
+            "target_id": "you",
+            "chat_id": chat_id,
+            "affinity_delta": update_b2y.affinity_delta,
+            "trust_delta": update_b2y.trust_delta,
+            "knowledge_facts": update_b2y.knowledge_facts,
+            "last_interaction_at": last_at,
+            "last_interaction_chat_id": chat_id,
+        },
+    )
+
+    edge_y2b = get_edge(conn, "you", host_bot_id) or {
+        "affinity": 50,
+        "trust": 50,
+        "summary": "",
+    }
+    update_y2b = await compute_state_update(
+        client,
+        model=settings.classifier_model,
+        source_id="you",
+        target_id=host_bot_id,
+        source_name=you_name,
+        source_persona=you_entity.get("persona", "") or "",
+        target_name=host_bot.get("name", "bot"),
+        prior_affinity=edge_y2b["affinity"],
+        prior_trust=edge_y2b["trust"],
+        prior_summary=edge_y2b.get("summary", "") or "",
+        recent_dialogue=recent_for_update,
+    )
+    append_and_apply(
+        conn,
+        kind="edge_update",
+        payload={
+            "source_id": "you",
+            "target_id": host_bot_id,
+            "chat_id": chat_id,
+            "affinity_delta": update_y2b.affinity_delta,
+            "trust_delta": update_y2b.trust_delta,
+            "knowledge_facts": update_y2b.knowledge_facts,
+            "last_interaction_at": last_at,
+            "last_interaction_chat_id": chat_id,
+        },
+    )
+
+    return new_text
+
+
+__all__ = ["regenerate_assistant_turn"]
@@ -460,6 +460,58 @@ async def rewind_preview(
    return HTMLResponse(body)


+# ---------------------------------------------------------------------------
+# Regenerate route (Task 29).
+#
+# A POST that re-streams the most recent assistant turn. The prior
+# ``assistant_turn`` event is kept in the log but flagged
+# ``superseded_by`` so the timeline filter in :func:`_read_recent_dialogue`
+# hides it. When the user supplies ``prose`` the original ``user_turn``
+# is also superseded by a fresh ``user_turn_edit`` event capturing the
+# edit. Significance is *not* re-run on regenerate (per plan §11.1) but
+# state-update + memory writes are.
+# ---------------------------------------------------------------------------
+
+
+@router.post("/chats/{chat_id}/turns/{event_id}/regenerate")
+async def regenerate_turn(
+    chat_id: str,
+    event_id: int,
+    request: Request,
+    prose: str | None = Form(None),
+    conn=Depends(get_conn),
+    client=Depends(get_llm_client),
+):
+    """Regenerate the assistant turn referenced by ``event_id``.
+
+    ``prose`` is optional. When provided (and non-empty) we capture a
+    ``user_turn_edit`` event before re-streaming. Returns 204 on
+    success, 404 when the chat or assistant_turn event is missing. The
+    SSE channel emits per-token events as the new text arrives.
+    """
+    chat = get_chat(conn, chat_id)
+    if chat is None:
+        raise HTTPException(status_code=404, detail=f"chat not found: {chat_id}")
+    settings = request.app.state.settings
+    # Local import keeps the module import graph flat (the service
+    # imports from ``state`` / ``services`` siblings already).
+    from chat.services.regenerate import regenerate_assistant_turn
+
+    edited_prose = prose if prose else None
+    try:
+        await regenerate_assistant_turn(
+            conn,
+            client,
+            settings=settings,
+            chat_id=chat_id,
+            original_assistant_event_id=event_id,
+            edited_user_prose=edited_prose,
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+    return Response(status_code=204)
+
+
@router.post("/chats/{chat_id}/rewind/{event_id}")
 async def rewind_execute(
    chat_id: str,
@@ -0,0 +1,273 @@
+"""Regenerate flow (T29).
+
+POST ``/chats/<chat_id>/turns/<event_id>/regenerate`` re-streams the
+assistant turn, supersedes the prior ``assistant_turn`` event, and — when
+prose is supplied — captures a ``user_turn_edit`` event that supersedes
+the original ``user_turn``.
+
+These tests cover the functional core required by the plan:
+
+- Without edit: a new ``assistant_turn`` is appended; the original is
+  marked ``superseded_by`` the new one.
+- With edit: a ``user_turn_edit`` event is appended; the original
+  ``user_turn`` is also marked ``superseded_by``.
+- Missing event id returns 404.
+"""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+from fastapi.testclient import TestClient
+
+from chat.app import app
+from chat.db.connection import open_db
+from chat.eventlog.log import append_event
+from chat.eventlog.projector import project
+from chat.llm.mock import MockLLMClient
+
+
+@pytest.fixture
+def client(tmp_path, monkeypatch):
+    cfg = tmp_path / "config.toml"
+    cfg.write_text('featherless_api_key = "test"\n')
+    monkeypatch.setenv("CHAT_CONFIG_PATH", str(cfg))
+    db = tmp_path / "test.db"
+    monkeypatch.setenv("CHAT_DB_PATH", str(db))
+    with TestClient(app) as c:
+        # Disable lifespan-managed background worker (would otherwise try
+        # to score significance through Featherless with the test key).
+        if hasattr(app.state, "background_worker"):
+            app.state.background_worker.enabled = False
+        yield c
+    app.dependency_overrides.clear()
+
+
+def _seed_with_one_turn(db_path):
+    """Seed bot, chat, edges/activity, and ONE round of user_turn + assistant_turn.
+
+    Returns ``(user_turn_event_id, assistant_turn_event_id)``.
+    """
+    with open_db(db_path) as conn:
+        append_event(
+            conn,
+            kind="bot_authored",
+            payload={
+                "id": "bot_a",
+                "name": "BotA",
+                "persona": "thoughtful",
+                "voice_samples": [],
+                "traits": [],
+                "backstory": "",
+                "initial_relationship_to_you": "",
+                "kickoff_prose": "",
+            },
+        )
+        append_event(
+            conn,
+            kind="chat_created",
+            payload={
+                "id": "chat_bot_a",
+                "host_bot_id": "bot_a",
+                "initial_time": "2026-04-26T20:00:00+00:00",
+                "narrative_anchor": "Day 1",
+                "weather": "",
+            },
+        )
+        append_event(
+            conn,
+            kind="edge_update",
+            payload={
+                "source_id": "bot_a",
+                "target_id": "you",
+                "chat_id": "chat_bot_a",
+            },
+        )
+        append_event(
+            conn,
+            kind="edge_update",
+            payload={
+                "source_id": "you",
+                "target_id": "bot_a",
+                "chat_id": "chat_bot_a",
+            },
+        )
+        append_event(
+            conn,
+            kind="activity_change",
+            payload={
+                "entity_id": "you",
+                "posture": "sitting",
+                "action": {"verb": "talking"},
+                "attention": "",
+                "holding": [],
+                "status": {},
+            },
+        )
+        append_event(
+            conn,
+            kind="activity_change",
+            payload={
+                "entity_id": "bot_a",
+                "posture": "sitting",
+                "action": {"verb": "listening"},
+                "attention": "",
+                "holding": [],
+                "status": {},
+            },
+        )
+        # First round: user_turn + assistant_turn.
+        ut_id = append_event(
+            conn,
+            kind="user_turn",
+            payload={
+                "chat_id": "chat_bot_a",
+                "prose": "hello",
+                "segments": [],
+            },
+        )
+        at_id = append_event(
+            conn,
+            kind="assistant_turn",
+            payload={
+                "chat_id": "chat_bot_a",
+                "speaker_id": "bot_a",
+                "text": "Original response.",
+                "truncated": False,
+                "user_turn_id": ut_id,
+            },
+        )
+        project(conn)
+        return ut_id, at_id
+
+
+def test_regenerate_without_edit_creates_new_assistant_turn(client, tmp_path):
+    """Reissuing the regenerate POST with no prose should:
+
+    - Stream a new ``assistant_turn`` carrying ``regenerated_from`` and
+      the canned narrative text.
+    - Mark the original ``assistant_turn`` row as ``superseded_by`` the
+      new one.
+    """
+    ut_id, at_id = _seed_with_one_turn(tmp_path / "test.db")
+
+    narrative_canned = "New response."
+    state_canned = json.dumps(
+        {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
+    )
+    canned = [narrative_canned, state_canned, state_canned]
+
+    from chat.web.kickoff import get_llm_client
+
+    app.dependency_overrides[get_llm_client] = lambda: MockLLMClient(
+        canned=list(canned)
+    )
+    try:
+        response = client.post(
+            f"/chats/chat_bot_a/turns/{at_id}/regenerate", data={}
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+
+    with open_db(tmp_path / "test.db") as conn:
+        # Original assistant_turn is now superseded.
+        row = conn.execute(
+            "SELECT superseded_by FROM event_log WHERE id = ?", (at_id,)
+        ).fetchone()
+        assert row[0] is not None
+
+        # A new assistant_turn exists, links back to the original, and
+        # carries the canned narrative text.
+        cur = conn.execute(
+            "SELECT id, payload_json FROM event_log "
+            "WHERE kind = 'assistant_turn' AND id != ? "
+            "AND superseded_by IS NULL",
+            (at_id,),
+        ).fetchall()
+        assert len(cur) == 1
+        new_id, new_payload_json = cur[0]
+        new_payload = json.loads(new_payload_json)
+        assert new_payload["text"] == "New response."
+        assert new_payload["regenerated_from"] == at_id
+        # The original assistant_turn's superseded_by points at the new one.
+        assert row[0] == new_id
+
+        # The original user_turn is NOT touched when no prose was supplied.
+        ut_row = conn.execute(
+            "SELECT superseded_by FROM event_log WHERE id = ?", (ut_id,)
+        ).fetchone()
+        assert ut_row[0] is None
+
+
+def test_regenerate_with_edit_appends_user_turn_edit(client, tmp_path):
+    """Supplying ``prose`` should:
+
+    - Append a ``user_turn_edit`` event whose payload references the
+      original user_turn id and carries the edited prose.
+    - Mark the original ``user_turn`` as ``superseded_by`` the edit.
+    """
+    ut_id, at_id = _seed_with_one_turn(tmp_path / "test.db")
+
+    narrative_canned = "Reply to edited."
+    state_canned = json.dumps(
+        {"affinity_delta": 0, "trust_delta": 0, "knowledge_facts": []}
+    )
+    canned = [narrative_canned, state_canned, state_canned]
+
+    from chat.web.kickoff import get_llm_client
+
+    app.dependency_overrides[get_llm_client] = lambda: MockLLMClient(
+        canned=list(canned)
+    )
+    try:
+        response = client.post(
+            f"/chats/chat_bot_a/turns/{at_id}/regenerate",
+            data={"prose": "edited prose"},
+        )
+        assert response.status_code == 204
+    finally:
+        app.dependency_overrides.clear()
+
+    with open_db(tmp_path / "test.db") as conn:
+        # A user_turn_edit event was appended with the edited prose and
+        # a back-pointer to the original user_turn.
+        cur = conn.execute(
+            "SELECT payload_json FROM event_log WHERE kind = 'user_turn_edit'"
+        ).fetchall()
+        assert len(cur) == 1
+        edit_payload = json.loads(cur[0][0])
+        assert edit_payload["prose"] == "edited prose"
+        assert edit_payload["supersedes_user_turn_id"] == ut_id
+        assert edit_payload["chat_id"] == "chat_bot_a"
+
+        # Original user_turn is now superseded.
+        ut_row = conn.execute(
+            "SELECT superseded_by FROM event_log WHERE id = ?", (ut_id,)
+        ).fetchone()
+        assert ut_row[0] is not None
+
+        # Original assistant_turn is also superseded by the new one.
+        at_row = conn.execute(
+            "SELECT superseded_by FROM event_log WHERE id = ?", (at_id,)
+        ).fetchone()
+        assert at_row[0] is not None
+
+
+def test_regenerate_404_when_assistant_turn_missing(client, tmp_path):
+    """An unknown ``event_id`` returns 404."""
+    _seed_with_one_turn(tmp_path / "test.db")
+
+    from chat.web.kickoff import get_llm_client
+
+    app.dependency_overrides[get_llm_client] = lambda: MockLLMClient(
+        canned=["x", "y", "z"]
+    )
+    try:
+        response = client.post(
+            "/chats/chat_bot_a/turns/99999/regenerate", data={}
+        )
+        assert response.status_code == 404
+    finally:
+        app.dependency_overrides.clear()