chat/chat/web/search.py

"""T100 (Phase 4): cross-chat search UX route.

Wraps T93's :func:`chat.services.cross_chat_search.search_all_memories`
in a small read-only HTML surface so the top-bar search input has
somewhere to land. The route does no filtering of its own beyond the
empty-query fast-path that T93 already implements; ranking, owner
scope, and witness scope all live in the service layer.

For each match we hydrate just enough metadata to render a row:
* the owner bot's display name (so users see "BOTA" not "bot_a"),
* the originating ``chat_id`` (the link target — there's no per-turn
  anchor today because memories don't carry an ``event_id`` column,
  so we deep-link to the chat as a whole),
* the originating scene title when one exists,
* and the ``pov_summary`` itself.

T106 (Phase 4.5): hydration is batched. Pre-T106 the route called
``get_bot``/``get_chat``/``get_scene`` once per result row — N+1 with
``DEFAULT_SEARCH_K=50`` meaning up to 150 individual SELECTs per page
load. We now collect distinct ids first and fan-in via three
``WHERE id IN (...)`` queries, then map back per row.

We deliberately keep this module synchronous and template-only — no
HTMX swaps, no JSON API — because the search box is a "leave the
current chat to look something up" surface, not an inline drawer.
"""

from __future__ import annotations

import json
from pathlib import Path
from sqlite3 import Connection

from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates

from chat.services.cross_chat_search import search_all_memories
from chat.state.entities import get_bot
from chat.state.world import get_chat, get_scene
from chat.web.bots import get_conn

TEMPLATES = Jinja2Templates(
    directory=str(Path(__file__).resolve().parent.parent / "templates")
)

#: Maximum cross-chat FTS matches surfaced per ``/search`` page load.
#: Extracted as a module-level constant (T106) so the cap is tunable
#: without touching the route body. ``search_all_memories`` itself
#: defaults to a smaller ``k=20``; we override here because the
#: top-bar search is a "scan everything I've seen" surface, not an
#: inline drawer.
DEFAULT_SEARCH_K = 50

router = APIRouter()


def _fetch_bots_by_ids(conn: Connection, ids: set[str]) -> dict[str, dict]:
    """Batched sibling of :func:`chat.state.entities.get_bot`.

    Inlined here (not exported from ``state.entities``) to keep T106's
    scope confined to ``search.py`` per the Phase 4.5 plan. Returns
    ``{bot_id: bot_dict}`` for every id present in ``ids``; ids with
    no matching row are simply absent from the map (the caller falls
    back to the raw id string the same way it did pre-T106).

    Empty ``ids`` short-circuits to ``{}`` because SQLite rejects
    ``WHERE id IN ()`` as a syntax error.
    """
    if not ids:
        return {}
    placeholders = ",".join("?" * len(ids))
    cols = [c[1] for c in conn.execute("PRAGMA table_info(bots)").fetchall()]
    rows = conn.execute(
        f"SELECT * FROM bots WHERE id IN ({placeholders})",
        tuple(ids),
    ).fetchall()
    out: dict[str, dict] = {}
    for row in rows:
        d = dict(zip(cols, row))
        d["voice_samples"] = json.loads(d.pop("voice_samples_json"))
        d["traits"] = json.loads(d.pop("traits_json"))
        out[d["id"]] = d
    return out


def _fetch_chats_by_ids(conn: Connection, ids: set[str]) -> dict[str, dict]:
    """Batched sibling of :func:`chat.state.world.get_chat`.

    Mirrors that helper's ``chats``/``chat_state`` JOIN so the returned
    dicts have the same shape (``narrative_anchor``, ``time``,
    ``weather``, ``active_scene_id``, etc.). Empty ``ids`` returns
    ``{}`` to dodge the ``IN ()`` syntax error.
    """
    if not ids:
        return {}
    placeholders = ",".join("?" * len(ids))
    rows = conn.execute(
        "SELECT c.id, c.host_bot_id, c.guest_bot_id, c.created_at, "
        "       s.time, s.weather, s.active_scene_id, s.narrative_anchor "
        f"FROM chats c JOIN chat_state s ON s.chat_id = c.id "
        f"WHERE c.id IN ({placeholders})",
        tuple(ids),
    ).fetchall()
    return {
        row[0]: {
            "id": row[0],
            "host_bot_id": row[1],
            "guest_bot_id": row[2],
            "created_at": row[3],
            "time": row[4],
            "weather": row[5],
            "active_scene_id": row[6],
            "narrative_anchor": row[7],
        }
        for row in rows
    }


def _fetch_scenes_by_ids(conn: Connection, ids: set[int]) -> dict[int, dict]:
    """Batched sibling of :func:`chat.state.world.get_scene`.

    Returns ``{scene_id: scene_dict}`` with ``participants`` already
    JSON-decoded so callers see the same shape as the per-row helper.
    Empty ``ids`` returns ``{}``.
    """
    if not ids:
        return {}
    placeholders = ",".join("?" * len(ids))
    cols = [c[1] for c in conn.execute("PRAGMA table_info(scenes)").fetchall()]
    rows = conn.execute(
        f"SELECT * FROM scenes WHERE id IN ({placeholders})",
        tuple(ids),
    ).fetchall()
    out: dict[int, dict] = {}
    for row in rows:
        d = dict(zip(cols, row))
        d["participants"] = json.loads(d.pop("participants_json"))
        out[d["id"]] = d
    return out


@router.get("/search", response_class=HTMLResponse)
async def search(request: Request, q: str = "", conn=Depends(get_conn)):
    """Render ``search.html`` with up to :data:`DEFAULT_SEARCH_K` matches.

    ``q`` is intentionally allowed to be empty — that path renders the
    page's "enter a query" placeholder rather than a 400, because the
    top-bar form submits to this URL even with an empty input. T93's
    service short-circuits whitespace-only queries to ``[]`` so there
    is no FTS5 ``MATCH ''`` syntax error to guard against here.

    Hydration (T106) is batched: rather than calling ``get_bot`` /
    ``get_chat`` / ``get_scene`` per row (worst case 3 * k individual
    SELECTs), we collect distinct ids and issue one ``IN (...)`` query
    per entity kind, then map back during the row build. ``get_bot``
    et al. remain imported for test-time monkeypatching but are no
    longer invoked on the hot path.
    """
    raw_results = (
        search_all_memories(conn, query=q, k=DEFAULT_SEARCH_K) if q else []
    )

    # Collect distinct ids up front so the IN-list queries dedupe (a
    # popular bot or scene shows up many times across the result set).
    bot_ids: set[str] = {r["owner_id"] for r in raw_results if r["owner_id"]}
    chat_ids: set[str] = {r["chat_id"] for r in raw_results if r["chat_id"]}
    scene_ids: set[int] = {
        r["scene_id"] for r in raw_results if r["scene_id"]
    }

    bots_by_id = _fetch_bots_by_ids(conn, bot_ids)
    chats_by_id = _fetch_chats_by_ids(conn, chat_ids)
    scenes_by_id = _fetch_scenes_by_ids(conn, scene_ids)

    # Hydrate display fields per row from the batched maps. We do this
    # in the route (not the service) so the service stays a pure FTS
    # shim that other UIs can reuse.
    results = []
    for row in raw_results:
        bot = bots_by_id.get(row["owner_id"])
        chat = chats_by_id.get(row["chat_id"])
        scene = (
            scenes_by_id.get(row["scene_id"]) if row["scene_id"] else None
        )
        results.append(
            {
                "memory_id": row["memory_id"],
                "owner_id": row["owner_id"],
                "owner_name": bot["name"] if bot else row["owner_id"],
                "chat_id": row["chat_id"],
                "chat_name": (
                    chat.get("narrative_anchor") if chat else None
                ),
                "scene_id": row["scene_id"],
                # T111.2: event_id deep-links to the originating turn
                # via the ``id="turn-{event_id}"`` anchor that Phase 3.5
                # T86 stamps on each turn DOM node. May be ``None`` for
                # memory rows projected before the 0014 migration ran
                # (T109 did not backfill historical rows); the template
                # falls back to a chat-level link in that case.
                "event_id": row["event_id"],
                # Scenes have no ``title`` column today; surface the
                # ``started_at`` timestamp as a human-friendly label
                # when a scene is set, otherwise leave it blank.
                "scene_label": (
                    scene.get("started_at") if scene else None
                ),
                "pov_summary": row["pov_summary"],
                # T111.1: ``snippet`` is the FTS5 windowed excerpt with
                # ``<mark>`` tags around each match. Falls back to the
                # full ``pov_summary`` if the row lacks a snippet (which
                # shouldn't happen on this code path because every
                # ``raw_results`` row came from a MATCH query, but we
                # guard defensively so the template never renders
                # ``None``).
                "snippet": row.get("snippet") or row["pov_summary"],
                "significance": row["significance"],
                "ts": row["ts"],
            }
        )

    return TEMPLATES.TemplateResponse(
        request,
        "search.html",
        {
            "query": q,
            "results": results,
            "active_nav": "search",
        },
    )