chat/chat/services/regenerate.py

"""Regenerate flow (T29).

The user clicks "Regenerate" on the latest ``assistant_turn``. The UI
puts the prior ``user_turn`` into inline edit mode and submits to
:func:`regenerate_assistant_turn` either:

- with **no edit** — we re-run the narrative against the original user
  prose and append a fresh ``assistant_turn`` superseding the old one;
- with **edited prose** — we additionally append a ``user_turn_edit``
  event capturing the new prose, mark the original ``user_turn`` as
  superseded by the edit, then run the narrative against the edited
  prose.

Per Requirements §10.2 superseded events are *kept in the log* — the
display layer hides them. This is what makes rewinding to before a
regenerate cheap: we just clear ``superseded_by`` on the old row.

The supersede update is one of the rare "direct DB write" exceptions
documented in the plan: we manipulate metadata fields on the canonical
event_log row itself rather than projecting through a handler.

Phase 1 simplifications (per the plan's "bound it" guidance):

- Significance pass is *not* re-run on regenerate. The original score
  remains attached to the prior memory. The state-update pass *is* re-run
  so affinity/trust/knowledge reflect the new output.
- The route does not broadcast a fresh ``turn_html`` SSE event; T34
  polishes UI swaps. The user refreshes the page to see the new turn.
  *(T73.1 closed this gap — see Phase 2.5 changes below.)*

Phase 2 changes (T44):

- Multi-entity prompt assembly: ``guest_id`` is forwarded to the
  prompt assembler so the regenerated narrative sees the same
  guest-aware context the original turn did.
- Multi-witness memory write: ``record_turn_memory_for_present`` fans
  out one ``memory_written`` event per witness when a guest is present.
- Multi-pair state-update: ``compute_state_updates_for_present`` emits
  one ``edge_update`` per directed pair across present entities. With
  three present that's six edges instead of two.
- Interjection regeneration is **deferred to Phase 2.5**. Regenerate
  only re-streams the addressee turn for v2; ``detect_interjection``
  is not invoked here. If the prior turn fired an interjection it
  remains attached to the original assistant_turn (which is superseded
  alongside the regenerated turn) — Phase 2.5 will revisit.

Phase 2.5 changes:

- T73.1: After the new ``assistant_turn`` lands we publish a
  ``turn_html_replace`` SSE event carrying the rendered HTML for the
  regenerated turn plus the original assistant_turn's event_id as
  ``supersedes_id`` so connected tabs can swap the prior DOM node
  in-place. We use a NEW event name (rather than re-using ``turn_html``)
  because the existing HTMX ``sse-swap="turn_html"`` consumer expects a
  raw-HTML body and an *append* semantic; ``turn_html_replace`` is a
  JSON payload (sse.py auto-serialises when extra keys accompany
  ``data``) so the front-end JS can read ``supersedes_id`` and replace
  the right node.
- T73.2: Interjection regeneration. When the original assistant_turn
  group included an interjection beat we redo BOTH the primary and the
  interjection — re-running ``detect_interjection`` against the new
  primary text. If the classifier returns False this time we supersede
  the original interjection without appending a replacement.
- T73.3: The defensive degrade-to-1:1 for stale ``guest_bot_id``
  references was removed — Phase 2 T47 fixed the root cause (resets
  clear the reference) so the guard is dead code.
"""

from __future__ import annotations

import json
from sqlite3 import Connection

from chat.config import Settings
from chat.eventlog.log import append_and_apply, append_event
from chat.services.interjection import detect_interjection
from chat.services.memory_write import record_turn_memory_for_present
from chat.services.multi_state_update import compute_state_updates_for_present
from chat.services.prompt import assemble_narrative_prompt
from chat.state.edges import get_edge
from chat.state.entities import get_bot, get_you
from chat.state.world import active_scene, get_chat
from chat.web.pubsub import publish
from chat.web.render import render_turn_html


async def regenerate_assistant_turn(
    conn: Connection,
    client,
    *,
    settings: Settings,
    chat_id: str,
    original_assistant_event_id: int,
    edited_user_prose: str | None = None,
) -> str:
    """Regenerate the assistant turn linked to ``original_assistant_event_id``.

    When ``edited_user_prose`` is provided the original user_turn is also
    superseded by a fresh ``user_turn_edit`` event capturing the new
    prose. Returns the new assistant text.

    Raises :class:`ValueError` when the chat or the assistant_turn event
    cannot be found — the FastAPI route translates this to 404.
    """
    chat = get_chat(conn, chat_id)
    if chat is None:
        raise ValueError("chat not found")
    host_bot_id = chat["host_bot_id"]
    host_bot = get_bot(conn, host_bot_id) or {
        "id": host_bot_id,
        "name": "bot",
        "persona": "",
    }

    # Phase 2: surface the guest (if any) so the prompt assembler and
    # downstream multi-entity passes see the same shape post_turn does.
    # Phase 2 T47 made bot_reset cascade-clear ``chat.guest_bot_id`` when
    # the referenced bot is purged (verified by tests/test_reset.py), so
    # we trust the column here: it's either a valid bot id or NULL.
    guest_bot_id = chat.get("guest_bot_id")
    guest_bot: dict | None = (
        get_bot(conn, guest_bot_id) if guest_bot_id is not None else None
    )

    # 1. Locate the original assistant_turn event.
    row = conn.execute(
        "SELECT payload_json FROM event_log "
        "WHERE id = ? AND kind = 'assistant_turn'",
        (original_assistant_event_id,),
    ).fetchone()
    if row is None:
        raise ValueError("assistant_turn event not found")
    original_assistant_payload = json.loads(row[0])
    original_user_turn_id = original_assistant_payload.get("user_turn_id")

    # 1a. Look up any sibling interjection beat in the same turn group
    # (T73.2). The original group is (primary + optional interjection),
    # both pinned to the same ``user_turn_id``. The interjection has a
    # populated ``interjection_of`` field in its payload — its speaker is
    # the silent witness (the bot that wasn't the primary addressee).
    # Filter on ``superseded_by IS NULL`` so prior regenerates of this
    # group don't reappear as siblings.
    original_interjection_event_id: int | None = None
    original_interjection_payload: dict | None = None
    if original_user_turn_id is not None:
        sibling_cur = conn.execute(
            "SELECT id, payload_json FROM event_log "
            "WHERE kind = 'assistant_turn' "
            "  AND id != ? "
            "  AND superseded_by IS NULL",
            (original_assistant_event_id,),
        )
        for sib_id, sib_payload_json in sibling_cur.fetchall():
            sib_payload = json.loads(sib_payload_json)
            if sib_payload.get("user_turn_id") != original_user_turn_id:
                continue
            if not sib_payload.get("interjection_of"):
                continue
            original_interjection_event_id = sib_id
            original_interjection_payload = sib_payload
            break
    # Phase 2 v2 regenerates only the addressee turn — preserve whichever
    # bot the original turn was attributed to, falling back to the host
    # for legacy rows that pre-date multi-entity support.
    speaker_bot_id = original_assistant_payload.get("speaker_id") or host_bot_id
    if speaker_bot_id == host_bot_id:
        speaker_bot = host_bot
    elif guest_bot is not None and speaker_bot_id == guest_bot.get("id"):
        speaker_bot = guest_bot
    else:
        speaker_bot = get_bot(conn, speaker_bot_id) or host_bot
        speaker_bot_id = speaker_bot.get("id", host_bot_id)

    # 2. Determine the prose for the new prompt and (when edited) capture
    # the user_turn_edit event up front so the new event ids exist before
    # we link them from the assistant_turn payload.
    new_user_event_id: int | None = None
    if edited_user_prose is not None:
        new_user_event_id = append_event(
            conn,
            kind="user_turn_edit",
            payload={
                "chat_id": chat_id,
                "prose": edited_user_prose,
                "supersedes_user_turn_id": original_user_turn_id,
            },
        )
        if original_user_turn_id is not None:
            conn.execute(
                "UPDATE event_log SET superseded_by = ? WHERE id = ?",
                (new_user_event_id, original_user_turn_id),
            )
        prose_for_prompt = edited_user_prose
    else:
        original_user_row = conn.execute(
            "SELECT payload_json FROM event_log WHERE id = ?",
            (original_user_turn_id,),
        ).fetchone() if original_user_turn_id is not None else None
        if original_user_row is not None:
            prose_for_prompt = json.loads(original_user_row[0]).get("prose", "")
        else:
            prose_for_prompt = ""

    # 3. Build the recent-dialogue slice. Exclude the original
    # assistant_turn explicitly (we haven't superseded it yet — that
    # update lands at the end so the new event_id is known) and use the
    # standard ``superseded_by IS NULL AND hidden = 0`` filter so any
    # prior regenerates also drop out.
    you_entity = get_you(conn) or {"name": "you", "persona": ""}
    you_name = you_entity.get("name", "you")
    cur = conn.execute(
        "SELECT id, kind, payload_json FROM event_log "
        "WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
        "  AND id != ? "
        "  AND superseded_by IS NULL AND hidden = 0 "
        "ORDER BY id DESC LIMIT 20",
        (original_assistant_event_id,),
    )
    rows = list(reversed(cur.fetchall()))
    recent: list[dict] = []
    for _eid, kind, payload_json in rows:
        p = json.loads(payload_json)
        if p.get("chat_id") != chat_id:
            continue
        if kind in ("user_turn", "user_turn_edit"):
            recent.append({"speaker": you_name, "text": p.get("prose", "")})
        else:
            spk = p.get("speaker_id", "bot")
            spk_name = host_bot.get("name", "bot")
            if spk == host_bot_id:
                spk_name = host_bot.get("name", "bot")
            elif guest_bot is not None and spk == guest_bot.get("id"):
                spk_name = guest_bot.get("name", "bot")
            recent.append({"speaker": spk_name, "text": p.get("text", "")})

    # 4. Assemble the narrative prompt. ``recent`` already excludes the
    # current user prose, which we pass through ``user_turn_prose``.
    # Phase 2: forward ``guest_id`` so the prompt sees the third party.
    messages = assemble_narrative_prompt(
        conn,
        chat_id=chat_id,
        speaker_bot_id=speaker_bot_id,
        user_turn_prose=prose_for_prompt or None,
        recent_dialogue=recent,
        budget_soft=settings.narrative_budget_soft,
        budget_hard=settings.narrative_budget_hard,
        guest_id=guest_bot_id,
    )

    # 5. Stream the new narrative.
    accumulated: list[str] = []
    async for chunk in client.stream(
        messages,
        model=settings.narrative_model,
        max_tokens=settings.narrative_max_tokens,
        temperature=settings.narrative_temperature,
    ):
        accumulated.append(chunk)
        await publish(
            chat_id,
            {"event": "token", "text": chunk, "speaker_id": speaker_bot_id},
        )
    new_text = "".join(accumulated)

    # 6. Append the new assistant_turn event. ``user_turn_id`` points at
    # the edit event when one was created, otherwise the original. The
    # ``regenerated_from`` field is the back-pointer the UI uses for an
    # "originally said …" affordance.
    new_assistant_event_id = append_event(
        conn,
        kind="assistant_turn",
        payload={
            "chat_id": chat_id,
            "speaker_id": speaker_bot_id,
            "text": new_text,
            "truncated": False,
            "user_turn_id": (
                new_user_event_id
                if new_user_event_id is not None
                else original_user_turn_id
            ),
            "regenerated_from": original_assistant_event_id,
        },
    )

    # 7. Mark the original assistant_turn as superseded by the new one.
    conn.execute(
        "UPDATE event_log SET superseded_by = ? WHERE id = ?",
        (new_assistant_event_id, original_assistant_event_id),
    )

    # 7a. Broadcast a turn_html_replace SSE event so connected tabs can
    # swap the prior assistant_turn DOM node in-place (T73.1, Phase 1.5
    # backlog #2). Uses a separate event name from post_turn's
    # ``turn_html`` (which is append-only) because regenerate is a
    # *replace* operation — see module docstring for the rationale.
    speaker_name_for_render = (
        speaker_bot.get("name", "bot") if speaker_bot is not None else "bot"
    )
    new_turn_html = render_turn_html(
        speaker_name_for_render, new_text, role="bot"
    )
    await publish(
        chat_id,
        {
            "event": "turn_html_replace",
            "data": new_turn_html,
            "turn_id": new_assistant_event_id,
            "supersedes_id": original_assistant_event_id,
        },
    )

    # 8. Re-run downstream classifier passes (memory write + state update
    # for every directed pair across present entities). Significance is
    # intentionally skipped on regenerate (the prior score remains
    # attached to the prior memory). Phase 2.5 will add interjection
    # regeneration; v2 leaves any prior interjection beat in place.
    scene = active_scene(conn, chat_id)
    record_turn_memory_for_present(
        conn,
        chat_id=chat_id,
        host_bot_id=host_bot_id,
        guest_bot_id=guest_bot_id,
        narrative_text=new_text,
        scene_id=scene["id"] if scene else None,
        chat_clock_at=chat.get("time"),
    )

    last_at = chat.get("time")
    speaker_name = (
        speaker_bot.get("name", "bot") if speaker_bot is not None else "bot"
    )
    recent_for_update = recent + [
        {"speaker": speaker_name, "text": new_text}
    ]

    # Build present-entity inputs for the multi-pair state-update pass.
    # Host first preserves the Phase 1 directed-pair order (host->you,
    # then you->host) so existing canned-response fixtures still line up.
    present_ids: list[str] = [host_bot_id, "you"]
    present_names: dict[str, str] = {
        host_bot_id: host_bot.get("name", "bot"),
        "you": you_name,
    }
    personas: dict[str, str] = {
        host_bot_id: host_bot.get("persona") or "",
        "you": you_entity.get("persona") or "",
    }
    if guest_bot is not None and guest_bot_id is not None:
        present_ids.append(guest_bot_id)
        present_names[guest_bot_id] = guest_bot.get("name", "bot")
        personas[guest_bot_id] = guest_bot.get("persona") or ""

    prior_edges: dict[tuple[str, str], dict] = {}
    for src in present_ids:
        for tgt in present_ids:
            if src == tgt:
                continue
            edge = get_edge(conn, src, tgt) or {
                "affinity": 50,
                "trust": 50,
                "summary": "",
            }
            prior_edges[(src, tgt)] = edge

    state_updates = await compute_state_updates_for_present(
        client,
        classifier_model=settings.classifier_model,
        present_ids=present_ids,
        present_names=present_names,
        personas=personas,
        prior_edges=prior_edges,
        recent_dialogue=recent_for_update,
        timeout_s=settings.classifier_timeout_s,
    )
    for src_id, tgt_id, update in state_updates:
        append_and_apply(
            conn,
            kind="edge_update",
            payload={
                "source_id": src_id,
                "target_id": tgt_id,
                "chat_id": chat_id,
                "affinity_delta": update.affinity_delta,
                "trust_delta": update.trust_delta,
                "knowledge_facts": update.knowledge_facts,
                "last_interaction_at": last_at,
                "last_interaction_chat_id": chat_id,
            },
        )

    # 9. Interjection regenerate branch (T73.2). When the original
    # assistant_turn group included a follow-on interjection beat we need
    # to revisit that beat against the regenerated primary. Three outcomes:
    #
    #   - No original interjection: nothing to do; we already short-circuit
    #     above by leaving ``original_interjection_event_id`` as None.
    #   - Original interjection + classifier returns True: stream a fresh
    #     interjection from the silent witness, append it (with
    #     ``interjection_of`` linking to the new primary speaker), and
    #     supersede the original interjection's row. Also re-run memory
    #     + state-update so the second beat moves edges + writes memories.
    #   - Original interjection + classifier returns False: supersede the
    #     original interjection without appending a replacement. The
    #     regenerated group becomes "primary only" because the new primary
    #     no longer warrants a follow-on. No memory / state work needed
    #     for the absent beat.
    #
    # ``superseded_by`` on the original interjection's row points at the
    # *new primary* in the no-replacement case (rather than NULL or a
    # nonexistent id) so the row is consistently hidden by the standard
    # ``superseded_by IS NULL`` timeline filter and the back-pointer
    # leads somewhere meaningful for an "originally said …" affordance.
    if original_interjection_event_id is not None and guest_bot is not None:
        # Identify the silent witness from the original interjection's
        # speaker_id (which is the bot that interjected last time). When
        # we regenerate we keep the *same pair of present entities*, so
        # the silent witness is whichever bot isn't the new primary
        # speaker — derive it from present rather than reusing the prior
        # speaker_id verbatim, in case the regenerated primary swapped
        # who held the floor.
        if speaker_bot_id == host_bot_id:
            silent_witness = guest_bot
        else:
            silent_witness = host_bot
        silent_witness_id = silent_witness.get("id")

        edge_w_to_addr = get_edge(conn, silent_witness_id, speaker_bot_id) or {
            "affinity": 50,
            "trust": 50,
            "summary": "",
        }
        edge_w_to_you = get_edge(conn, silent_witness_id, "you") or {
            "affinity": 50,
            "trust": 50,
            "summary": "",
        }

        decision = await detect_interjection(
            client,
            classifier_model=settings.classifier_model,
            addressee_name=speaker_bot.get("name", "bot"),
            addressee_just_said=new_text,
            silent_witness_name=silent_witness.get("name", "bot"),
            silent_witness_persona=silent_witness.get("persona") or "",
            silent_witness_edge_to_addressee=edge_w_to_addr,
            silent_witness_edge_to_you=edge_w_to_you,
            you_just_said=prose_for_prompt or "",
            timeout_s=settings.classifier_timeout_s,
        )

        if decision.should_interject:
            # Re-read recent so the just-appended primary is in the prompt.
            interject_cur = conn.execute(
                "SELECT id, kind, payload_json FROM event_log "
                "WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
                "  AND superseded_by IS NULL AND hidden = 0 "
                "ORDER BY id DESC LIMIT 20",
            )
            interject_rows = list(reversed(interject_cur.fetchall()))
            interject_recent: list[dict] = []
            for _eid, kind, payload_json in interject_rows:
                p = json.loads(payload_json)
                if p.get("chat_id") != chat_id:
                    continue
                if kind in ("user_turn", "user_turn_edit"):
                    interject_recent.append(
                        {"speaker": you_name, "text": p.get("prose", "")}
                    )
                else:
                    spk = p.get("speaker_id", "bot")
                    if spk == host_bot_id:
                        spk_name = host_bot.get("name", "bot")
                    elif spk == guest_bot.get("id"):
                        spk_name = guest_bot.get("name", "bot")
                    else:
                        spk_name = "bot"
                    interject_recent.append(
                        {"speaker": spk_name, "text": p.get("text", "")}
                    )
            if interject_recent and interject_recent[-1].get("speaker") == you_name:
                interject_recent = interject_recent[:-1]

            interject_messages = assemble_narrative_prompt(
                conn,
                chat_id=chat_id,
                speaker_bot_id=silent_witness_id,
                addressee=speaker_bot_id,
                user_turn_prose=prose_for_prompt or None,
                recent_dialogue=interject_recent,
                budget_soft=settings.narrative_budget_soft,
                budget_hard=settings.narrative_budget_hard,
                guest_id=guest_bot_id,
            )

            interject_accumulated: list[str] = []
            async for chunk in client.stream(
                interject_messages,
                model=settings.narrative_model,
                max_tokens=settings.narrative_max_tokens,
                temperature=settings.narrative_temperature,
            ):
                interject_accumulated.append(chunk)
                await publish(
                    chat_id,
                    {
                        "event": "token",
                        "text": chunk,
                        "speaker_id": silent_witness_id,
                    },
                )
            interject_text = "".join(interject_accumulated)

            new_interjection_event_id = append_event(
                conn,
                kind="assistant_turn",
                payload={
                    "chat_id": chat_id,
                    "speaker_id": silent_witness_id,
                    "text": interject_text,
                    "truncated": False,
                    "user_turn_id": (
                        new_user_event_id
                        if new_user_event_id is not None
                        else original_user_turn_id
                    ),
                    "regenerated_from": original_interjection_event_id,
                    "interjection_of": speaker_bot_id,
                },
            )

            # Supersede the original interjection by the new one.
            conn.execute(
                "UPDATE event_log SET superseded_by = ? WHERE id = ?",
                (new_interjection_event_id, original_interjection_event_id),
            )

            # Broadcast a replace event so connected tabs swap the prior
            # interjection node in-place (mirrors T73.1's primary swap).
            interject_html = render_turn_html(
                silent_witness.get("name", "bot"), interject_text, role="bot"
            )
            await publish(
                chat_id,
                {
                    "event": "turn_html_replace",
                    "data": interject_html,
                    "turn_id": new_interjection_event_id,
                    "supersedes_id": original_interjection_event_id,
                },
            )

            # Memory write for the new interjection beat (one event per
            # present witness).
            record_turn_memory_for_present(
                conn,
                chat_id=chat_id,
                host_bot_id=host_bot_id,
                guest_bot_id=guest_bot_id,
                narrative_text=interject_text,
                scene_id=scene["id"] if scene else None,
                chat_clock_at=chat.get("time"),
            )

            # Re-run the multi-pair state-update with the post-interjection
            # dialogue tail so deltas land on the post-primary baseline.
            recent_post_interject = recent_for_update + [
                {
                    "speaker": silent_witness.get("name", "bot"),
                    "text": interject_text,
                }
            ]
            prior_edges_post: dict[tuple[str, str], dict] = {}
            for src in present_ids:
                for tgt in present_ids:
                    if src == tgt:
                        continue
                    edge = get_edge(conn, src, tgt) or {
                        "affinity": 50,
                        "trust": 50,
                        "summary": "",
                    }
                    prior_edges_post[(src, tgt)] = edge

            state_updates_post = await compute_state_updates_for_present(
                client,
                classifier_model=settings.classifier_model,
                present_ids=present_ids,
                present_names=present_names,
                personas=personas,
                prior_edges=prior_edges_post,
                recent_dialogue=recent_post_interject,
                timeout_s=settings.classifier_timeout_s,
            )
            for src_id, tgt_id, update in state_updates_post:
                append_and_apply(
                    conn,
                    kind="edge_update",
                    payload={
                        "source_id": src_id,
                        "target_id": tgt_id,
                        "chat_id": chat_id,
                        "affinity_delta": update.affinity_delta,
                        "trust_delta": update.trust_delta,
                        "knowledge_facts": update.knowledge_facts,
                        "last_interaction_at": last_at,
                        "last_interaction_chat_id": chat_id,
                    },
                )
        else:
            # Classifier said "no follow-on this time" — supersede the
            # original interjection without a replacement. Point the
            # back-pointer at the new primary so the row is consistently
            # hidden by the standard timeline filter.
            conn.execute(
                "UPDATE event_log SET superseded_by = ? WHERE id = ?",
                (new_assistant_event_id, original_interjection_event_id),
            )

    return new_text


__all__ = ["regenerate_assistant_turn"]