9987da2c07
Add ``m.event_id`` (T109's nullable column from migration 0014) to
``search_all_memories``'s SELECT, propagate it through the route's
template context, and have ``search.html`` build result links as
``/chats/{chat_id}#turn-{event_id}`` — matching the ``id="turn-{event_id}"``
anchor that Phase 3.5 T86 stamps on each turn DOM node so the chat page
scrolls to the originating turn on load. Memory rows projected before
the 0014 migration ran read NULL ``event_id``; the template falls back
to a chat-level link in that case so we never emit ``#turn-None``.
Pre-existing tests that asserted on the bare ``href="/chats/{chat_id}"``
contract are updated to assert on the ``href="/chats/{chat_id}#turn-``
prefix to reflect the new deep-link.
232 lines
9.1 KiB
Python
232 lines
9.1 KiB
Python
"""T100 (Phase 4): cross-chat search UX route.
|
|
|
|
Wraps T93's :func:`chat.services.cross_chat_search.search_all_memories`
|
|
in a small read-only HTML surface so the top-bar search input has
|
|
somewhere to land. The route does no filtering of its own beyond the
|
|
empty-query fast-path that T93 already implements; ranking, owner
|
|
scope, and witness scope all live in the service layer.
|
|
|
|
For each match we hydrate just enough metadata to render a row:
|
|
* the owner bot's display name (so users see "BOTA" not "bot_a"),
|
|
* the originating ``chat_id`` (the link target — there's no per-turn
|
|
anchor today because memories don't carry an ``event_id`` column,
|
|
so we deep-link to the chat as a whole),
|
|
* the originating scene title when one exists,
|
|
* and the ``pov_summary`` itself.
|
|
|
|
T106 (Phase 4.5): hydration is batched. Pre-T106 the route called
|
|
``get_bot``/``get_chat``/``get_scene`` once per result row — N+1 with
|
|
``DEFAULT_SEARCH_K=50`` meaning up to 150 individual SELECTs per page
|
|
load. We now collect distinct ids first and fan-in via three
|
|
``WHERE id IN (...)`` queries, then map back per row.
|
|
|
|
We deliberately keep this module synchronous and template-only — no
|
|
HTMX swaps, no JSON API — because the search box is a "leave the
|
|
current chat to look something up" surface, not an inline drawer.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from sqlite3 import Connection
|
|
|
|
from fastapi import APIRouter, Depends, Request
|
|
from fastapi.responses import HTMLResponse
|
|
from fastapi.templating import Jinja2Templates
|
|
|
|
from chat.services.cross_chat_search import search_all_memories
|
|
from chat.state.entities import get_bot
|
|
from chat.state.world import get_chat, get_scene
|
|
from chat.web.bots import get_conn
|
|
|
|
TEMPLATES = Jinja2Templates(
|
|
directory=str(Path(__file__).resolve().parent.parent / "templates")
|
|
)
|
|
|
|
#: Maximum cross-chat FTS matches surfaced per ``/search`` page load.
|
|
#: Extracted as a module-level constant (T106) so the cap is tunable
|
|
#: without touching the route body. ``search_all_memories`` itself
|
|
#: defaults to a smaller ``k=20``; we override here because the
|
|
#: top-bar search is a "scan everything I've seen" surface, not an
|
|
#: inline drawer.
|
|
DEFAULT_SEARCH_K = 50
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
def _fetch_bots_by_ids(conn: Connection, ids: set[str]) -> dict[str, dict]:
|
|
"""Batched sibling of :func:`chat.state.entities.get_bot`.
|
|
|
|
Inlined here (not exported from ``state.entities``) to keep T106's
|
|
scope confined to ``search.py`` per the Phase 4.5 plan. Returns
|
|
``{bot_id: bot_dict}`` for every id present in ``ids``; ids with
|
|
no matching row are simply absent from the map (the caller falls
|
|
back to the raw id string the same way it did pre-T106).
|
|
|
|
Empty ``ids`` short-circuits to ``{}`` because SQLite rejects
|
|
``WHERE id IN ()`` as a syntax error.
|
|
"""
|
|
if not ids:
|
|
return {}
|
|
placeholders = ",".join("?" * len(ids))
|
|
cols = [c[1] for c in conn.execute("PRAGMA table_info(bots)").fetchall()]
|
|
rows = conn.execute(
|
|
f"SELECT * FROM bots WHERE id IN ({placeholders})",
|
|
tuple(ids),
|
|
).fetchall()
|
|
out: dict[str, dict] = {}
|
|
for row in rows:
|
|
d = dict(zip(cols, row))
|
|
d["voice_samples"] = json.loads(d.pop("voice_samples_json"))
|
|
d["traits"] = json.loads(d.pop("traits_json"))
|
|
out[d["id"]] = d
|
|
return out
|
|
|
|
|
|
def _fetch_chats_by_ids(conn: Connection, ids: set[str]) -> dict[str, dict]:
|
|
"""Batched sibling of :func:`chat.state.world.get_chat`.
|
|
|
|
Mirrors that helper's ``chats``/``chat_state`` JOIN so the returned
|
|
dicts have the same shape (``narrative_anchor``, ``time``,
|
|
``weather``, ``active_scene_id``, etc.). Empty ``ids`` returns
|
|
``{}`` to dodge the ``IN ()`` syntax error.
|
|
"""
|
|
if not ids:
|
|
return {}
|
|
placeholders = ",".join("?" * len(ids))
|
|
rows = conn.execute(
|
|
"SELECT c.id, c.host_bot_id, c.guest_bot_id, c.created_at, "
|
|
" s.time, s.weather, s.active_scene_id, s.narrative_anchor "
|
|
f"FROM chats c JOIN chat_state s ON s.chat_id = c.id "
|
|
f"WHERE c.id IN ({placeholders})",
|
|
tuple(ids),
|
|
).fetchall()
|
|
return {
|
|
row[0]: {
|
|
"id": row[0],
|
|
"host_bot_id": row[1],
|
|
"guest_bot_id": row[2],
|
|
"created_at": row[3],
|
|
"time": row[4],
|
|
"weather": row[5],
|
|
"active_scene_id": row[6],
|
|
"narrative_anchor": row[7],
|
|
}
|
|
for row in rows
|
|
}
|
|
|
|
|
|
def _fetch_scenes_by_ids(conn: Connection, ids: set[int]) -> dict[int, dict]:
|
|
"""Batched sibling of :func:`chat.state.world.get_scene`.
|
|
|
|
Returns ``{scene_id: scene_dict}`` with ``participants`` already
|
|
JSON-decoded so callers see the same shape as the per-row helper.
|
|
Empty ``ids`` returns ``{}``.
|
|
"""
|
|
if not ids:
|
|
return {}
|
|
placeholders = ",".join("?" * len(ids))
|
|
cols = [c[1] for c in conn.execute("PRAGMA table_info(scenes)").fetchall()]
|
|
rows = conn.execute(
|
|
f"SELECT * FROM scenes WHERE id IN ({placeholders})",
|
|
tuple(ids),
|
|
).fetchall()
|
|
out: dict[int, dict] = {}
|
|
for row in rows:
|
|
d = dict(zip(cols, row))
|
|
d["participants"] = json.loads(d.pop("participants_json"))
|
|
out[d["id"]] = d
|
|
return out
|
|
|
|
|
|
@router.get("/search", response_class=HTMLResponse)
|
|
async def search(request: Request, q: str = "", conn=Depends(get_conn)):
|
|
"""Render ``search.html`` with up to :data:`DEFAULT_SEARCH_K` matches.
|
|
|
|
``q`` is intentionally allowed to be empty — that path renders the
|
|
page's "enter a query" placeholder rather than a 400, because the
|
|
top-bar form submits to this URL even with an empty input. T93's
|
|
service short-circuits whitespace-only queries to ``[]`` so there
|
|
is no FTS5 ``MATCH ''`` syntax error to guard against here.
|
|
|
|
Hydration (T106) is batched: rather than calling ``get_bot`` /
|
|
``get_chat`` / ``get_scene`` per row (worst case 3 * k individual
|
|
SELECTs), we collect distinct ids and issue one ``IN (...)`` query
|
|
per entity kind, then map back during the row build. ``get_bot``
|
|
et al. remain imported for test-time monkeypatching but are no
|
|
longer invoked on the hot path.
|
|
"""
|
|
raw_results = (
|
|
search_all_memories(conn, query=q, k=DEFAULT_SEARCH_K) if q else []
|
|
)
|
|
|
|
# Collect distinct ids up front so the IN-list queries dedupe (a
|
|
# popular bot or scene shows up many times across the result set).
|
|
bot_ids: set[str] = {r["owner_id"] for r in raw_results if r["owner_id"]}
|
|
chat_ids: set[str] = {r["chat_id"] for r in raw_results if r["chat_id"]}
|
|
scene_ids: set[int] = {
|
|
r["scene_id"] for r in raw_results if r["scene_id"]
|
|
}
|
|
|
|
bots_by_id = _fetch_bots_by_ids(conn, bot_ids)
|
|
chats_by_id = _fetch_chats_by_ids(conn, chat_ids)
|
|
scenes_by_id = _fetch_scenes_by_ids(conn, scene_ids)
|
|
|
|
# Hydrate display fields per row from the batched maps. We do this
|
|
# in the route (not the service) so the service stays a pure FTS
|
|
# shim that other UIs can reuse.
|
|
results = []
|
|
for row in raw_results:
|
|
bot = bots_by_id.get(row["owner_id"])
|
|
chat = chats_by_id.get(row["chat_id"])
|
|
scene = (
|
|
scenes_by_id.get(row["scene_id"]) if row["scene_id"] else None
|
|
)
|
|
results.append(
|
|
{
|
|
"memory_id": row["memory_id"],
|
|
"owner_id": row["owner_id"],
|
|
"owner_name": bot["name"] if bot else row["owner_id"],
|
|
"chat_id": row["chat_id"],
|
|
"chat_name": (
|
|
chat.get("narrative_anchor") if chat else None
|
|
),
|
|
"scene_id": row["scene_id"],
|
|
# T111.2: event_id deep-links to the originating turn
|
|
# via the ``id="turn-{event_id}"`` anchor that Phase 3.5
|
|
# T86 stamps on each turn DOM node. May be ``None`` for
|
|
# memory rows projected before the 0014 migration ran
|
|
# (T109 did not backfill historical rows); the template
|
|
# falls back to a chat-level link in that case.
|
|
"event_id": row["event_id"],
|
|
# Scenes have no ``title`` column today; surface the
|
|
# ``started_at`` timestamp as a human-friendly label
|
|
# when a scene is set, otherwise leave it blank.
|
|
"scene_label": (
|
|
scene.get("started_at") if scene else None
|
|
),
|
|
"pov_summary": row["pov_summary"],
|
|
# T111.1: ``snippet`` is the FTS5 windowed excerpt with
|
|
# ``<mark>`` tags around each match. Falls back to the
|
|
# full ``pov_summary`` if the row lacks a snippet (which
|
|
# shouldn't happen on this code path because every
|
|
# ``raw_results`` row came from a MATCH query, but we
|
|
# guard defensively so the template never renders
|
|
# ``None``).
|
|
"snippet": row.get("snippet") or row["pov_summary"],
|
|
"significance": row["significance"],
|
|
"ts": row["ts"],
|
|
}
|
|
)
|
|
|
|
return TEMPLATES.TemplateResponse(
|
|
request,
|
|
"search.html",
|
|
{
|
|
"query": q,
|
|
"results": results,
|
|
"active_nav": "search",
|
|
},
|
|
)
|