chat/chat/services/memory_write.py

"""Per-turn memory writes (T21).

After ``assistant_turn`` lands, the turn flow records a ``memory_written``
event for each present POV owner. Phase 1 single-bot turns only have the
host bot as a memory-store owner — ``you`` doesn't have a memory store in
v1 — so we write exactly one row per turn.

Phase 1 simplifications (per plan §11.1, T27 will refine):

- ``pov_summary`` is the assistant's raw narrative text. T27 rewrites at
  scene close into per-POV summary form.
- ``significance`` defaults to ``1`` (Notable). T22's async significance
  pass overwrites via a follow-up event.
- Witness flags are hard-coded ``[you=1, host=1, guest=0]``. Phase 2 will
  derive them from ``chat.guest_bot_id`` once a guest can be present.

T97 (Phase 4): each successful memory write also enqueues an
:class:`~chat.services.embedding_worker.EmbeddingJob` on the
lifespan-managed embedding worker, so the just-written memory gets a
vector indexed out-of-band. The hook is opt-in via the ``app`` kwarg —
callers without a FastAPI app handle (e.g. one-off scripts, isolated
unit tests) simply don't enqueue, and the backfill script can pick up
those rows later.
"""

from __future__ import annotations

from sqlite3 import Connection

from chat.eventlog.log import append_and_apply
from chat.services.embedding_worker import EmbeddingJob


def _write_one_memory(
    conn: Connection,
    *,
    owner_id: str,
    chat_id: str,
    narrative_text: str,
    witness_you: int,
    witness_host: int,
    witness_guest: int,
    scene_id: int | None,
    chat_clock_at: str | None,
    source: str,
    significance: int,
    app=None,
) -> tuple[int, int | None]:
    """Append a single ``memory_written`` event for ``owner_id`` and return
    ``(event_id, memory_id)`` for the projected row.

    When ``app`` is provided and ``app.state.embedding_worker`` exists,
    enqueue an :class:`EmbeddingJob` for the freshly-projected memory id
    (T97). Skipped silently if the worker is absent or the projected row
    can't be located — the backfill script handles missing-vector rows.
    """
    payload: dict = {
        "owner_id": owner_id,
        "chat_id": chat_id,
        "pov_summary": narrative_text,
        "witness_you": witness_you,
        "witness_host": witness_host,
        "witness_guest": witness_guest,
        "source": source,
        "reliability": 1.0,
        "significance": significance,
        "pinned": 0,
        "auto_pinned": 0,
    }
    if scene_id is not None:
        payload["scene_id"] = scene_id
    if chat_clock_at is not None:
        payload["chat_clock_at"] = chat_clock_at

    event_id = append_and_apply(conn, kind="memory_written", payload=payload)
    row = conn.execute(
        "SELECT id FROM memories "
        "WHERE owner_id = ? AND chat_id = ? "
        "ORDER BY id DESC LIMIT 1",
        (owner_id, chat_id),
    ).fetchone()
    memory_id = row[0] if row else None

    # T97: enqueue an embedding job for the just-written memory. The
    # worker drains the queue out-of-band and emits an
    # ``embedding_indexed`` event when the vector is ready. ``getattr``
    # keeps this a no-op for callers without a wired-up app (scripts,
    # tests) — the backfill script handles those rows.
    if memory_id is not None and narrative_text and narrative_text.strip():
        worker = (
            getattr(app.state, "embedding_worker", None)
            if app is not None
            else None
        )
        if worker is not None:
            worker.enqueue(
                EmbeddingJob(memory_id=memory_id, text=narrative_text)
            )

    return event_id, memory_id


def record_turn_memory_for_present(
    conn: Connection,
    *,
    chat_id: str,
    host_bot_id: str,
    guest_bot_id: str | None,
    narrative_text: str,
    scene_id: int | None = None,
    chat_clock_at: str | None = None,
    source: str = "direct",
    significance: int = 1,
    you_present: bool = True,
    app=None,
) -> dict[str, tuple[int, int | None]]:
    """Single entry-point for per-turn memory writes (T84).

    Writes one ``memory_written`` event per present bot witness. Host is
    always written. Guest is written iff ``guest_bot_id is not None``.

    Witness flags depend on ``you_present``:

    - ``you_present=True`` (default — Phase 1/2/3 you-scenes): the user
      is a witness. Mask is ``[you=1, host=1, guest=1]`` when a guest is
      present, ``[you=1, host=1, guest=0]`` otherwise.
    - ``you_present=False`` (Phase 3 meanwhile scenes): the user is
      absent. Mask is ``[you=0, host=1, guest=1]`` for both bots. Both
      ``host_bot_id`` and ``guest_bot_id`` are required — a meanwhile
      scene by definition has both bots, so passing ``guest_bot_id=None``
      with ``you_present=False`` is a programming error and raises
      :class:`ValueError`.

    When ``app`` is provided, each per-witness write also enqueues an
    :class:`EmbeddingJob` on ``app.state.embedding_worker`` (T97).

    Returns a mapping ``{bot_id: (event_id, memory_id)}`` so callers can
    look up the freshly-projected memory id per owner without re-querying
    the database.
    """
    if not you_present and guest_bot_id is None:
        raise ValueError("you_present=False requires guest_bot_id")

    witness_you = 1 if you_present else 0
    witness_host = 1
    witness_guest = 1 if guest_bot_id is not None else 0

    result: dict[str, tuple[int, int | None]] = {}
    result[host_bot_id] = _write_one_memory(
        conn,
        owner_id=host_bot_id,
        chat_id=chat_id,
        narrative_text=narrative_text,
        witness_you=witness_you,
        witness_host=witness_host,
        witness_guest=witness_guest,
        scene_id=scene_id,
        chat_clock_at=chat_clock_at,
        source=source,
        significance=significance,
        app=app,
    )
    if guest_bot_id is not None:
        result[guest_bot_id] = _write_one_memory(
            conn,
            owner_id=guest_bot_id,
            chat_id=chat_id,
            narrative_text=narrative_text,
            witness_you=witness_you,
            witness_host=witness_host,
            witness_guest=1,
            scene_id=scene_id,
            chat_clock_at=chat_clock_at,
            source=source,
            significance=significance,
            app=app,
        )
    return result


def record_meanwhile_memory(
    conn: Connection,
    *,
    chat_id: str,
    host_bot_id: str,
    guest_bot_id: str,
    narrative_text: str,
    scene_id: int | None = None,
    chat_clock_at: str | None = None,
    source: str = "direct",
    significance: int = 1,
    app=None,
) -> dict[str, tuple[int, int | None]]:
    """Backward-compat thin wrapper for meanwhile memory writes (T64, T84).

    Equivalent to calling :func:`record_turn_memory_for_present` with
    ``you_present=False``. Kept so existing call sites in
    :mod:`chat.web.meanwhile` continue to work without churn. New code
    should prefer the unified entry-point directly.
    """
    return record_turn_memory_for_present(
        conn,
        chat_id=chat_id,
        host_bot_id=host_bot_id,
        guest_bot_id=guest_bot_id,
        narrative_text=narrative_text,
        scene_id=scene_id,
        chat_clock_at=chat_clock_at,
        source=source,
        significance=significance,
        you_present=False,
        app=app,
    )