chat/chat/services/scene_summarize.py

"""Per-POV scene summary and edge summary update on scene close (T27).

When a scene closes — either auto-detected by the hard-signal classifier
in T26 or fired by the manual close button on the drawer — we run a
single-shot classifier per present witness that produces three signals
in one pass:

* ``summary`` — a 2-4 sentence per-POV recap of the scene from this
  witness's perspective. Different from omniscient narration; focuses on
  what the witness noticed/felt/remembers.
* ``knowledge_facts`` — concrete new things this witness learned about
  the user during the scene. Promoted to the directed edge's
  ``knowledge`` list via ``edge_update``.
* ``relationship_summary`` — a 1-2 sentence delta on how the
  witness's relationship to the user shifted in this scene. v1
  combines this with the prior edge summary by simple concatenation —
  the LLM is asked to phrase ``relationship_summary`` as a merge-ready
  fragment, so the result reads naturally without a second classifier
  round-trip.

Phase 1 single-bot only the host bot is summarized; "you" doesn't have
a memory store in v1 so per-POV writes for the user are deferred. The
:func:`apply_scene_close_summary` driver is intentionally tolerant: if
no memories belong to the closed scene it silently skips the rewrite,
and a flapping classifier returns the empty default so the close flow
keeps moving.
"""

from __future__ import annotations

import json
import uuid
from datetime import datetime, timezone
from sqlite3 import Connection

from pydantic import BaseModel, Field

from chat.eventlog.log import append_and_apply
from chat.llm.classify import classify
from chat.llm.client import LLMClient


class ScenePOVSummary(BaseModel):
    """Classifier output: one witness's view of a closing scene.

    Defaults are an inert no-op so a classifier failure is harmless —
    callers can apply the result unconditionally and end up not
    rewriting anything when the model misbehaves.
    """

    summary: str = ""
    knowledge_facts: list[str] = Field(default_factory=list)
    relationship_summary: str = ""


_SYSTEM_TEMPLATE = (
    "You are summarizing a roleplay scene from {bot_name}'s point of "
    "view. Read the dialogue, then output JSON with exactly three "
    "fields:\n"
    "- summary: 2-4 sentences, in {bot_name}'s POV, of what happened "
    "in the scene. This is NOT omniscient narration — focus on what "
    "{bot_name} noticed, felt, and would remember.\n"
    "- knowledge_facts: list of NEW factual things {bot_name} learned "
    "about the user during this scene. Use specific stated content; do "
    "not infer or interpret. Empty list is fine.\n"
    "- relationship_summary: a SHORT (1-2 sentence) summary of how "
    "{bot_name}'s relationship with the user changed or developed in "
    "this scene. Phrase it so it reads as a continuation of the prior "
    "summary; the caller will concatenate them.\n\n"
    "Be specific. Avoid generic phrases."
)


def _format_dialogue(dialogue: list[dict]) -> str:
    if not dialogue:
        return "(no dialogue)"
    return "\n".join(
        f"{turn.get('speaker', '?')}: {turn.get('text', '')}"
        for turn in dialogue
    )


async def summarize_scene(
    client: LLMClient,
    *,
    model: str,
    bot_name: str,
    bot_persona: str,
    you_name: str,
    prior_edge_summary: str,
    dialogue: list[dict],
    timeout_s: float = 10.0,
) -> ScenePOVSummary:
    """Run the per-POV summary classifier for one witness.

    The signature mirrors :func:`compute_state_update` — passing the
    bot's name and persona as separate fields lets the prompt address
    the model directly ("YOU are {bot_name}") rather than handing it an
    opaque id. ``prior_edge_summary`` is included so the classifier can
    phrase ``relationship_summary`` as an additive fragment.

    Returns the empty default on classifier failure (after one retry)
    rather than raising, so the close pipeline keeps moving.
    """
    system = _SYSTEM_TEMPLATE.format(bot_name=bot_name)
    user = (
        f"YOU are {bot_name}. {bot_persona or '(no persona on file)'}\n"
        f"USER name: {you_name}\n"
        f"PRIOR EDGE SUMMARY ({bot_name} -> {you_name}): "
        f"{prior_edge_summary or '(empty)'}\n\n"
        f"DIALOGUE:\n{_format_dialogue(dialogue)}\n\n"
        f"Produce the JSON summary in {bot_name}'s POV."
    )
    return await classify(
        client,
        model=model,
        system=system,
        user=user,
        schema=ScenePOVSummary,
        default=ScenePOVSummary(),
        timeout_s=timeout_s,
    )


def _read_recent_dialogue(
    conn: Connection,
    chat_id: str,
    *,
    limit: int = 50,
    since_event_id: int | None = None,
) -> list[dict]:
    """Pull the last ``limit`` user/assistant turns for ``chat_id``.

    Phase 1 ``user_turn`` / ``assistant_turn`` events don't carry a
    ``scene_id``, so we approximate the scene's transcript by taking
    the most recent turns of the chat. Superseded and hidden rows are
    filtered out so regenerated turns (T29) don't bleed into the
    summary.

    T80.2: ``since_event_id`` clamps the result to event_log rows whose
    ``id >= since_event_id`` so callers needing a scene-scoped view (e.g.
    thread detection on close) don't pull turns that landed before the
    closing scene's ``scene_opened`` event.
    """
    if since_event_id is None:
        cur = conn.execute(
            "SELECT kind, payload_json FROM event_log "
            "WHERE kind IN ('user_turn', 'assistant_turn') "
            "  AND superseded_by IS NULL AND hidden = 0 "
            "ORDER BY id DESC LIMIT ?",
            (limit,),
        )
    else:
        cur = conn.execute(
            "SELECT kind, payload_json FROM event_log "
            "WHERE kind IN ('user_turn', 'assistant_turn') "
            "  AND superseded_by IS NULL AND hidden = 0 "
            "  AND id >= ? "
            "ORDER BY id DESC LIMIT ?",
            (since_event_id, limit),
        )
    rows = list(reversed(cur.fetchall()))
    out: list[dict] = []
    for kind, payload_json in rows:
        p = json.loads(payload_json)
        if p.get("chat_id") != chat_id:
            continue
        if kind == "user_turn":
            out.append({"speaker": "you", "text": p.get("prose", "")})
        else:
            out.append(
                {
                    "speaker": p.get("speaker_id", "bot"),
                    "text": p.get("text", ""),
                }
            )
    return out


def _scene_opened_event_id(
    conn: Connection, chat_id: str, scene_id: int
) -> int | None:
    """Return the event_log id of the ``scene_opened`` (or
    ``meanwhile_scene_started``) event that created scene row
    ``scene_id``. Used by T80.2 to lower-bound dialogue reads to a
    single scene's transcript.

    ``meanwhile_scene_started`` carries an explicit ``scene_id`` so we
    match on that directly. ``scene_opened`` doesn't, so we walk the
    chat's scene rows in id order and zip against the chat's scene-open
    events in id order — the projector creates one scene row per
    scene-open event, so positions correspond.

    Returns ``None`` when no matching event is found; callers should
    treat that as "fall back to chat-wide" rather than over-filter.
    """
    # Fast path for meanwhile children (explicit scene_id in payload).
    for ev_id, payload_json in conn.execute(
        "SELECT id, payload_json FROM event_log "
        "WHERE kind = 'meanwhile_scene_started' "
        "  AND superseded_by IS NULL AND hidden = 0",
    ).fetchall():
        try:
            p = json.loads(payload_json)
        except (TypeError, ValueError):
            continue
        if p.get("chat_id") == chat_id and p.get("scene_id") == scene_id:
            return ev_id
    # Fallback for parent you-scenes: zip chat-scoped scene-open events
    # against chat-scoped scene rows in id order.
    chat_scene_ids = [
        r[0]
        for r in conn.execute(
            "SELECT id FROM scenes WHERE chat_id = ? ORDER BY id ASC",
            (chat_id,),
        ).fetchall()
    ]
    if scene_id not in chat_scene_ids:
        return None
    chat_open_evs: list[int] = []
    for ev_id, _kind, payload_json in conn.execute(
        "SELECT id, kind, payload_json FROM event_log "
        "WHERE kind IN ('scene_opened', 'meanwhile_scene_started') "
        "  AND superseded_by IS NULL AND hidden = 0 "
        "ORDER BY id ASC",
    ).fetchall():
        try:
            p = json.loads(payload_json)
        except (TypeError, ValueError):
            continue
        if p.get("chat_id") == chat_id:
            chat_open_evs.append(ev_id)
    idx = chat_scene_ids.index(scene_id)
    if idx < len(chat_open_evs):
        return chat_open_evs[idx]
    return None


async def _summarize_and_apply_for_witness(
    conn: Connection,
    client: LLMClient,
    *,
    classifier_model: str,
    chat_id: str,
    scene_id: int,
    bot_id: str,
    you_name: str,
    dialogue: list[dict],
    timeout_s: float,
    key_quotes_suffix: str = "",
) -> ScenePOVSummary:
    """Run :func:`summarize_scene` for one bot witness and apply the
    three projected updates (memory pov_summary rewrite, edge summary
    overwrite, edge knowledge_facts append).

    Tolerant of missing pieces in the same way Phase 1 was: no memory
    row -> skip the rewrite; no edge row -> skip the edge_summary write
    (the empty-default classifier output simply yields no rewrites).

    ``key_quotes_suffix`` is appended verbatim to the per-POV summary
    text before the rewrite lands (T58.1) — empty string is the no-op
    default for low-significance scenes.
    """
    from chat.state.edges import get_edge
    from chat.state.entities import get_bot

    bot = get_bot(conn, bot_id) or {"name": bot_id, "persona": ""}

    edge_b2y = get_edge(conn, bot_id, "you")
    prior_summary = (edge_b2y or {}).get("summary", "") or ""

    pov = await summarize_scene(
        client,
        model=classifier_model,
        bot_name=bot.get("name", bot_id),
        bot_persona=bot.get("persona", "") or "",
        you_name=you_name,
        prior_edge_summary=prior_summary,
        dialogue=dialogue,
        timeout_s=timeout_s,
    )

    # Update memories belonging to the closed scene for this witness.
    cur = conn.execute(
        "SELECT id, pov_summary FROM memories "
        "WHERE scene_id = ? AND owner_id = ?",
        (scene_id, bot_id),
    )
    for memory_id, prior_pov in cur.fetchall():
        if not pov.summary:
            # Empty default -> skip the memory rewrite; the seeded
            # per-turn pov_summary stays in place.
            continue
        # T80.1: a prior close may have already appended a Key quotes
        # suffix to this row's pov_summary. Strip it here so the fresh
        # rewrite replaces the existing suffix rather than stacking a
        # second one on top.
        new_value = _strip_key_quotes_suffix(pov.summary) + key_quotes_suffix
        append_and_apply(
            conn,
            kind="manual_edit",
            payload={
                "target_kind": "memory_pov_summary",
                "target_id": int(memory_id),
                "prior_value": prior_pov,
                "new_value": new_value,
            },
        )

    # Update this bot->you edge summary if we have an edge row and a
    # non-empty relationship_summary to merge.
    if edge_b2y is not None and pov.relationship_summary:
        new_summary = (
            f"{prior_summary} {pov.relationship_summary}".strip()
            if prior_summary
            else pov.relationship_summary
        )
        append_and_apply(
            conn,
            kind="manual_edit",
            payload={
                "target_kind": "edge_summary",
                "target_id": {
                    "source_id": bot_id,
                    "target_id": "you",
                },
                "prior_value": prior_summary,
                "new_value": new_summary,
            },
        )

    # Append knowledge_facts to this bot->you edge if present.
    if pov.knowledge_facts:
        append_and_apply(
            conn,
            kind="edge_update",
            payload={
                "source_id": bot_id,
                "target_id": "you",
                "chat_id": chat_id,
                "knowledge_facts": list(pov.knowledge_facts),
            },
        )

    return pov


# T80.1: header marker shared by the suffix builder and the
# witness-write strip step. Any text starting with this marker is treated
# as a previously-appended Key quotes suffix and stripped before reuse so
# repeated scene closes don't compose recursive bloat.
_KEY_QUOTES_HEADER = "\n\nKey quotes:\n"


def _strip_key_quotes_suffix(text: str) -> str:
    """Remove a previously-appended Key quotes suffix from ``text``.

    Returns ``text`` unchanged when the marker is absent, or the prefix
    up to (but not including) the marker when present. Used in two
    places: (1) when sourcing quote text from a memory row that may
    already carry the suffix from a prior close, and (2) when computing
    the per-POV rewrite's prior_value so the new write replaces — rather
    than stacks on — the old suffix.
    """
    if not text:
        return text
    idx = text.find(_KEY_QUOTES_HEADER)
    if idx >= 0:
        return text[:idx]
    return text


def _build_key_quotes_suffix(conn: Connection, scene_id: int) -> str:
    """If the scene's max-turn-significance is >= 2, build the
    "Key quotes:" suffix from the top-3 highest-significance memory rows
    (per requirements §11.1). Otherwise return the empty string so the
    per-POV summaries collapse fully (low-significance scenes lose all
    raw text in favor of the classifier rewrite).

    Quote source is each memory's current ``pov_summary`` — the raw
    per-turn narrative seeded by T21, since this helper is called BEFORE
    the per-POV rewrite. Texts are truncated to 200 chars to bound
    memory row growth across many witnesses.

    T80.1: candidate text is run through :func:`_strip_key_quotes_suffix`
    first so a re-close (whose source memories already carry a suffix from
    the prior close) doesn't quote a quote.
    """
    row = conn.execute(
        "SELECT MAX(significance) FROM memories WHERE scene_id = ?",
        (scene_id,),
    ).fetchone()
    max_sig = (row[0] if row else None) or 0
    if max_sig < 2:
        return ""
    cur = conn.execute(
        "SELECT pov_summary FROM memories WHERE scene_id = ? "
        "ORDER BY significance DESC, id ASC LIMIT 3",
        (scene_id,),
    )
    quotes = [
        _strip_key_quotes_suffix(r[0] or "")[:200]
        for r in cur.fetchall()
    ]
    if not quotes:
        return ""
    lines = "\n".join(f'- "{q}"' for q in quotes)
    return f"\n\nKey quotes:\n{lines}"


async def apply_scene_close_summary(
    conn: Connection,
    client: LLMClient,
    *,
    classifier_model: str,
    chat_id: str,
    scene_id: int,
    host_bot_id: str,
    timeout_s: float = 10.0,
) -> ScenePOVSummary:
    """Drive the per-POV summary pipeline after ``scene_closed``.

    Phase 1 (single-bot) behavior — the host bot is summarized once and
    the result drives memory + edge rewrites — is preserved exactly when
    the chat has no guest. T45 extends this to fan out across each
    present bot witness when a guest is also in the room:

      1. Gather the closing scene's dialogue from the event_log.
      2. For each present witness (host + guest if any), run
         :func:`summarize_scene` once with that witness's persona and
         their own prior ``bot -> you`` edge summary.
      3. For each witness independently:
         a. Rewrite each scene-bound memory's ``pov_summary`` via
            ``manual_edit`` (target_kind ``memory_pov_summary``).
         b. Update that witness's ``bot -> you`` edge summary via
            ``manual_edit`` (target_kind ``edge_summary``). v2 combines
            prior + classifier ``relationship_summary`` by simple
            concatenation.
         c. Append any ``knowledge_facts`` to the same edge via
            ``edge_update``.
      4. If a ``group_node`` row exists for this chat, append a
         ``group_node_updated`` event whose ``summary`` is the naive
         per-POV concat ``f"{name}: {summary}\\n\\n..."``. A true
         LLM-merged group view is deferred to Phase 2.5; ``dynamic``
         is left empty here for v2 (Phase 3 polishes it).

    The host's :class:`ScenePOVSummary` is returned to preserve the
    Phase 1 callers' contract.
    """
    # Local imports to keep the module-level surface tight and avoid
    # any chance of a circular dep through chat.state.*.
    from chat.services.thread_detection import detect_threads
    from chat.state.entities import get_bot, get_you
    from chat.state.group_node import get_group_node
    from chat.state.threads import list_open_threads
    from chat.state.world import get_chat, get_scene

    you_entity = get_you(conn) or {"name": "you", "persona": ""}
    you_name = you_entity.get("name", "you") or "you"

    chat = get_chat(conn, chat_id) or {}
    guest_bot_id = chat.get("guest_bot_id")

    # T65: detect meanwhile child scenes via the migration-0011
    # ``present_set_kind`` column. The mechanism is a single field read
    # — meanwhile scenes carry ``"host_guest"``, regular you-scenes
    # carry the default ``"you_host"``. We read this once up front so
    # both the dialogue source and the post-summary digest emission
    # branches can reference it.
    closing_scene = get_scene(conn, scene_id) or {}
    is_meanwhile = closing_scene.get("present_set_kind") == "host_guest"

    dialogue = _read_recent_dialogue(conn, chat_id)

    # T58.1: build the "Key quotes:" suffix BEFORE the per-POV rewrites
    # land — quote source is the raw seeded pov_summary text on each
    # memory row, which the rewrite about to fire would clobber.
    key_quotes_suffix = _build_key_quotes_suffix(conn, scene_id)

    host_pov = await _summarize_and_apply_for_witness(
        conn,
        client,
        classifier_model=classifier_model,
        chat_id=chat_id,
        scene_id=scene_id,
        bot_id=host_bot_id,
        you_name=you_name,
        dialogue=dialogue,
        timeout_s=timeout_s,
        key_quotes_suffix=key_quotes_suffix,
    )

    guest_pov: ScenePOVSummary | None = None
    if guest_bot_id is not None:
        guest_pov = await _summarize_and_apply_for_witness(
            conn,
            client,
            classifier_model=classifier_model,
            chat_id=chat_id,
            scene_id=scene_id,
            bot_id=guest_bot_id,
            you_name=you_name,
            dialogue=dialogue,
            timeout_s=timeout_s,
            key_quotes_suffix=key_quotes_suffix,
        )

    # Group node update: T70 runs a third classifier call to merge the
    # two per-POV summaries into a coherent group-level view + a brief
    # group-dynamic note. Falls back to the Phase 2 naive concat on
    # classifier failure (see :func:`merge_group_summary`). Only fires
    # when both POVs ran (i.e. the guest is present) and a group_node
    # row exists for this chat.
    if guest_pov is not None and get_group_node(conn, chat_id) is not None:
        host_bot = get_bot(conn, host_bot_id) or {"name": host_bot_id}
        guest_bot = get_bot(conn, guest_bot_id) or {"name": guest_bot_id}
        host_name = host_bot.get("name", host_bot_id) or host_bot_id
        guest_name = guest_bot.get("name", guest_bot_id) or guest_bot_id
        merged = await merge_group_summary(
            client,
            classifier_model=classifier_model,
            host_name=host_name,
            host_pov_summary=host_pov.summary,
            guest_name=guest_name,
            guest_pov_summary=guest_pov.summary,
            timeout_s=timeout_s,
        )
        append_and_apply(
            conn,
            kind="group_node_updated",
            payload={
                "chat_id": chat_id,
                "summary": merged.summary,
                "dynamic": merged.dynamic,
            },
        )

    # T65: when the closing scene was a meanwhile child (host_guest
    # present set), generate an omniscient briefing for the absent
    # "you" and queue it as a pending digest. We reuse summarize_scene
    # with a narrator persona so the digest text is shaped by the same
    # classifier — only the ``summary`` field is consumed downstream.
    # Emitted AFTER per-POV summaries land so witness memories carry
    # their own POV text first; this mirrors how group_node_updated is
    # ordered relative to the per-POV writes above.
    if is_meanwhile:
        digest_pov = await summarize_scene(
            client,
            model=classifier_model,
            bot_name="Narrator",
            bot_persona=_MEANWHILE_DIGEST_PERSONA,
            you_name=you_name,
            prior_edge_summary="",
            dialogue=dialogue,
            timeout_s=timeout_s,
        )
        if digest_pov.summary:
            append_and_apply(
                conn,
                kind="meanwhile_digest_created",
                payload={
                    "chat_id": chat_id,
                    "scene_id": scene_id,
                    "summary": digest_pov.summary,
                },
            )

    # T58.2: thread detection on close. Failure-tolerant: classify()
    # returns the empty default on retry-exhaustion, and the broad except
    # below protects the close pipeline from any other classifier/mock
    # flap.
    #
    # T80.2: thread detection runs against a SCENE-SCOPED transcript,
    # not the chat-wide last-50 turns used by the per-POV summaries.
    # Mis-attributing threads when scene boundaries fall inside the last
    # 50 turns would otherwise close threads opened in a prior scene.
    scene_open_ev_id = _scene_opened_event_id(conn, chat_id, scene_id)
    if scene_open_ev_id is not None:
        scene_dialogue = _read_recent_dialogue(
            conn, chat_id, since_event_id=scene_open_ev_id
        )
    else:
        scene_dialogue = dialogue
    try:
        thread_result = await detect_threads(
            client,
            classifier_model=classifier_model,
            scene_transcript=scene_dialogue,
            open_threads=list_open_threads(conn, chat_id),
            timeout_s=timeout_s,
        )
    except Exception:
        from chat.services.thread_detection import ThreadDetectionResult

        thread_result = ThreadDetectionResult()
    for cand in thread_result.candidates:
        if cand.action == "open":
            new_thread_id = f"thr_{uuid.uuid4().hex[:12]}"
            append_and_apply(
                conn,
                kind="thread_opened",
                payload={
                    "thread_id": new_thread_id,
                    "chat_id": chat_id,
                    "title": cand.title,
                    "summary": cand.summary,
                },
            )
        elif cand.action == "update" and cand.existing_thread_id:
            append_and_apply(
                conn,
                kind="thread_updated",
                payload={
                    "thread_id": cand.existing_thread_id,
                    "summary": cand.summary,
                    "last_referenced_scene_id": scene_id,
                },
            )
        elif cand.action == "close" and cand.existing_thread_id:
            append_and_apply(
                conn,
                kind="thread_closed",
                payload={
                    "thread_id": cand.existing_thread_id,
                    "closed_at": datetime.now(timezone.utc).isoformat(),
                },
            )

    return host_pov


class GroupMetaSummary(BaseModel):
    """Classifier output: a merged group-level view of a closed scene.

    Defaults are an empty no-op so callers can use the schema's default
    as a sentinel; in practice :func:`merge_group_summary` builds an
    explicit naive-concat fallback rather than returning these defaults
    directly so existing Phase 2 behavior is preserved on classifier
    failure.
    """

    summary: str = ""
    dynamic: str = ""


_GROUP_MERGE_SYSTEM = (
    "Given two per-POV scene summaries from a 3-entity scene (you + "
    "host + guest), produce a coherent group-level summary capturing "
    "the shared events as both witnesses experienced them, plus a "
    "brief 'dynamic' note describing the trio's group dynamic during "
    "the scene. Output strict JSON matching schema."
)


# T65: meanwhile-scene digest. The "you" player was absent during this
# scene; the digest is a short neutral briefing they'll read on the next
# you-scene resume. Reuses the ScenePOVSummary schema so the same
# `summarize_scene` helper can be called with a different persona — only
# the ``summary`` field is used downstream.
_MEANWHILE_DIGEST_PERSONA = (
    "an omniscient narrator briefing the absent player in 2-3 neutral "
    "sentences on what happened while they were away — no editorializing, "
    "no second-person address"
)


async def merge_group_summary(
    client: LLMClient,
    *,
    classifier_model: str,
    host_name: str,
    host_pov_summary: str,
    guest_name: str,
    guest_pov_summary: str,
    timeout_s: float = 30.0,
) -> GroupMetaSummary:
    """Merge two per-POV scene summaries into a coherent group-level
    summary + group-dynamic note. Falls back to the naive concat (the
    existing behavior) on classifier failure."""
    user = (
        f"{host_name} (host) POV summary:\n{host_pov_summary}\n\n"
        f"{guest_name} (guest) POV summary:\n{guest_pov_summary}"
    )
    fallback = GroupMetaSummary(
        summary=(
            f"{host_name}: {host_pov_summary}\n\n"
            f"{guest_name}: {guest_pov_summary}"
        ),
        dynamic="",
    )
    return await classify(
        client,
        model=classifier_model,
        system=_GROUP_MERGE_SYSTEM,
        user=user,
        schema=GroupMetaSummary,
        default=fallback,
        timeout_s=timeout_s,
    )