Files
chat/chat/web/search.py
T
Joseph Doherty 9987da2c07 feat: cross-chat search deep-links to turn via memories.event_id (T111.2)
Add ``m.event_id`` (T109's nullable column from migration 0014) to
``search_all_memories``'s SELECT, propagate it through the route's
template context, and have ``search.html`` build result links as
``/chats/{chat_id}#turn-{event_id}`` — matching the ``id="turn-{event_id}"``
anchor that Phase 3.5 T86 stamps on each turn DOM node so the chat page
scrolls to the originating turn on load. Memory rows projected before
the 0014 migration ran read NULL ``event_id``; the template falls back
to a chat-level link in that case so we never emit ``#turn-None``.

Pre-existing tests that asserted on the bare ``href="/chats/{chat_id}"``
contract are updated to assert on the ``href="/chats/{chat_id}#turn-``
prefix to reflect the new deep-link.
2026-04-27 05:42:17 -04:00

232 lines
9.1 KiB
Python

"""T100 (Phase 4): cross-chat search UX route.
Wraps T93's :func:`chat.services.cross_chat_search.search_all_memories`
in a small read-only HTML surface so the top-bar search input has
somewhere to land. The route does no filtering of its own beyond the
empty-query fast-path that T93 already implements; ranking, owner
scope, and witness scope all live in the service layer.
For each match we hydrate just enough metadata to render a row:
* the owner bot's display name (so users see "BOTA" not "bot_a"),
* the originating ``chat_id`` (the link target — there's no per-turn
anchor today because memories don't carry an ``event_id`` column,
so we deep-link to the chat as a whole),
* the originating scene title when one exists,
* and the ``pov_summary`` itself.
T106 (Phase 4.5): hydration is batched. Pre-T106 the route called
``get_bot``/``get_chat``/``get_scene`` once per result row — N+1 with
``DEFAULT_SEARCH_K=50`` meaning up to 150 individual SELECTs per page
load. We now collect distinct ids first and fan-in via three
``WHERE id IN (...)`` queries, then map back per row.
We deliberately keep this module synchronous and template-only — no
HTMX swaps, no JSON API — because the search box is a "leave the
current chat to look something up" surface, not an inline drawer.
"""
from __future__ import annotations
import json
from pathlib import Path
from sqlite3 import Connection
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from chat.services.cross_chat_search import search_all_memories
from chat.state.entities import get_bot
from chat.state.world import get_chat, get_scene
from chat.web.bots import get_conn
TEMPLATES = Jinja2Templates(
directory=str(Path(__file__).resolve().parent.parent / "templates")
)
#: Maximum cross-chat FTS matches surfaced per ``/search`` page load.
#: Extracted as a module-level constant (T106) so the cap is tunable
#: without touching the route body. ``search_all_memories`` itself
#: defaults to a smaller ``k=20``; we override here because the
#: top-bar search is a "scan everything I've seen" surface, not an
#: inline drawer.
DEFAULT_SEARCH_K = 50
router = APIRouter()
def _fetch_bots_by_ids(conn: Connection, ids: set[str]) -> dict[str, dict]:
"""Batched sibling of :func:`chat.state.entities.get_bot`.
Inlined here (not exported from ``state.entities``) to keep T106's
scope confined to ``search.py`` per the Phase 4.5 plan. Returns
``{bot_id: bot_dict}`` for every id present in ``ids``; ids with
no matching row are simply absent from the map (the caller falls
back to the raw id string the same way it did pre-T106).
Empty ``ids`` short-circuits to ``{}`` because SQLite rejects
``WHERE id IN ()`` as a syntax error.
"""
if not ids:
return {}
placeholders = ",".join("?" * len(ids))
cols = [c[1] for c in conn.execute("PRAGMA table_info(bots)").fetchall()]
rows = conn.execute(
f"SELECT * FROM bots WHERE id IN ({placeholders})",
tuple(ids),
).fetchall()
out: dict[str, dict] = {}
for row in rows:
d = dict(zip(cols, row))
d["voice_samples"] = json.loads(d.pop("voice_samples_json"))
d["traits"] = json.loads(d.pop("traits_json"))
out[d["id"]] = d
return out
def _fetch_chats_by_ids(conn: Connection, ids: set[str]) -> dict[str, dict]:
"""Batched sibling of :func:`chat.state.world.get_chat`.
Mirrors that helper's ``chats``/``chat_state`` JOIN so the returned
dicts have the same shape (``narrative_anchor``, ``time``,
``weather``, ``active_scene_id``, etc.). Empty ``ids`` returns
``{}`` to dodge the ``IN ()`` syntax error.
"""
if not ids:
return {}
placeholders = ",".join("?" * len(ids))
rows = conn.execute(
"SELECT c.id, c.host_bot_id, c.guest_bot_id, c.created_at, "
" s.time, s.weather, s.active_scene_id, s.narrative_anchor "
f"FROM chats c JOIN chat_state s ON s.chat_id = c.id "
f"WHERE c.id IN ({placeholders})",
tuple(ids),
).fetchall()
return {
row[0]: {
"id": row[0],
"host_bot_id": row[1],
"guest_bot_id": row[2],
"created_at": row[3],
"time": row[4],
"weather": row[5],
"active_scene_id": row[6],
"narrative_anchor": row[7],
}
for row in rows
}
def _fetch_scenes_by_ids(conn: Connection, ids: set[int]) -> dict[int, dict]:
"""Batched sibling of :func:`chat.state.world.get_scene`.
Returns ``{scene_id: scene_dict}`` with ``participants`` already
JSON-decoded so callers see the same shape as the per-row helper.
Empty ``ids`` returns ``{}``.
"""
if not ids:
return {}
placeholders = ",".join("?" * len(ids))
cols = [c[1] for c in conn.execute("PRAGMA table_info(scenes)").fetchall()]
rows = conn.execute(
f"SELECT * FROM scenes WHERE id IN ({placeholders})",
tuple(ids),
).fetchall()
out: dict[int, dict] = {}
for row in rows:
d = dict(zip(cols, row))
d["participants"] = json.loads(d.pop("participants_json"))
out[d["id"]] = d
return out
@router.get("/search", response_class=HTMLResponse)
async def search(request: Request, q: str = "", conn=Depends(get_conn)):
"""Render ``search.html`` with up to :data:`DEFAULT_SEARCH_K` matches.
``q`` is intentionally allowed to be empty — that path renders the
page's "enter a query" placeholder rather than a 400, because the
top-bar form submits to this URL even with an empty input. T93's
service short-circuits whitespace-only queries to ``[]`` so there
is no FTS5 ``MATCH ''`` syntax error to guard against here.
Hydration (T106) is batched: rather than calling ``get_bot`` /
``get_chat`` / ``get_scene`` per row (worst case 3 * k individual
SELECTs), we collect distinct ids and issue one ``IN (...)`` query
per entity kind, then map back during the row build. ``get_bot``
et al. remain imported for test-time monkeypatching but are no
longer invoked on the hot path.
"""
raw_results = (
search_all_memories(conn, query=q, k=DEFAULT_SEARCH_K) if q else []
)
# Collect distinct ids up front so the IN-list queries dedupe (a
# popular bot or scene shows up many times across the result set).
bot_ids: set[str] = {r["owner_id"] for r in raw_results if r["owner_id"]}
chat_ids: set[str] = {r["chat_id"] for r in raw_results if r["chat_id"]}
scene_ids: set[int] = {
r["scene_id"] for r in raw_results if r["scene_id"]
}
bots_by_id = _fetch_bots_by_ids(conn, bot_ids)
chats_by_id = _fetch_chats_by_ids(conn, chat_ids)
scenes_by_id = _fetch_scenes_by_ids(conn, scene_ids)
# Hydrate display fields per row from the batched maps. We do this
# in the route (not the service) so the service stays a pure FTS
# shim that other UIs can reuse.
results = []
for row in raw_results:
bot = bots_by_id.get(row["owner_id"])
chat = chats_by_id.get(row["chat_id"])
scene = (
scenes_by_id.get(row["scene_id"]) if row["scene_id"] else None
)
results.append(
{
"memory_id": row["memory_id"],
"owner_id": row["owner_id"],
"owner_name": bot["name"] if bot else row["owner_id"],
"chat_id": row["chat_id"],
"chat_name": (
chat.get("narrative_anchor") if chat else None
),
"scene_id": row["scene_id"],
# T111.2: event_id deep-links to the originating turn
# via the ``id="turn-{event_id}"`` anchor that Phase 3.5
# T86 stamps on each turn DOM node. May be ``None`` for
# memory rows projected before the 0014 migration ran
# (T109 did not backfill historical rows); the template
# falls back to a chat-level link in that case.
"event_id": row["event_id"],
# Scenes have no ``title`` column today; surface the
# ``started_at`` timestamp as a human-friendly label
# when a scene is set, otherwise leave it blank.
"scene_label": (
scene.get("started_at") if scene else None
),
"pov_summary": row["pov_summary"],
# T111.1: ``snippet`` is the FTS5 windowed excerpt with
# ``<mark>`` tags around each match. Falls back to the
# full ``pov_summary`` if the row lacks a snippet (which
# shouldn't happen on this code path because every
# ``raw_results`` row came from a MATCH query, but we
# guard defensively so the template never renders
# ``None``).
"snippet": row.get("snippet") or row["pov_summary"],
"significance": row["significance"],
"ts": row["ts"],
}
)
return TEMPLATES.TemplateResponse(
request,
"search.html",
{
"query": q,
"results": results,
"active_nav": "search",
},
)