From fa87ab8c552acf722a98e864f0dfbc0e8c0bcdcc Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 05:30:32 -0400 Subject: [PATCH] feat: cross-chat search FTS snippet highlighting (T111.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the ``pov_summary`` column in ``search_all_memories``'s SELECT with ``snippet(memories_fts, 0, '', '', '…', 32)`` so each match in a result row is wrapped in ```` for the search-results UI. The original ``pov_summary`` is still returned alongside as a non-highlighted fallback. Template renders ``r.snippet|safe`` — the only HTML in the snippet output is the configured ```` markers, so it is safe to bypass Jinja's auto-escape. --- chat/services/cross_chat_search.py | 30 ++++++++++++++++++++++++------ chat/templates/search.html | 10 +++++++++- chat/web/search.py | 8 ++++++++ tests/test_search_ux.py | 16 ++++++++++++++++ 4 files changed, 57 insertions(+), 7 deletions(-) diff --git a/chat/services/cross_chat_search.py b/chat/services/cross_chat_search.py index cb0403f..2e10f71 100644 --- a/chat/services/cross_chat_search.py +++ b/chat/services/cross_chat_search.py @@ -26,13 +26,19 @@ def search_all_memories( """Search FTS5 across all owners and chats. Returns rows with ``{memory_id, owner_id, chat_id, scene_id, - pov_summary, significance, ts, fts_rank}``, sorted by FTS5 BM25 - rank ascending (lower rank = stronger match, surfaced first). + pov_summary, snippet, significance, ts, fts_rank}``, sorted by FTS5 + BM25 rank ascending (lower rank = stronger match, surfaced first). The ``memories`` table has no ``ts`` column; we expose ``created_at`` (the projector-side row insertion timestamp) under that key so the UI does not have to know the storage name. + ``snippet`` (T111.1) is the FTS5 ``snippet()`` output for the + matched ``pov_summary`` column: a windowed excerpt with each match + token wrapped in ``...`` for the search-results UI to + render verbatim. The full ``pov_summary`` is also returned so + non-highlighted callers (or fallbacks) keep the original string. + An empty / whitespace-only ``query`` short-circuits to ``[]`` to avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar "no input yet" state from triggering a full-table scan. @@ -45,9 +51,20 @@ def search_all_memories( # from the content table because the FTS index only stores # ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns # negative scores where lower is better. + # + # ``snippet(memories_fts, 0, ...)`` (T111.1) targets column 0 of the + # FTS virtual table, which is ``pov_summary`` (the only column + # indexed by ``CREATE VIRTUAL TABLE memories_fts USING fts5( + # pov_summary, ...)`` in migration 0006). SQLite passes the raw + # column text through verbatim aside from inserting the configured + # before/after match markers, so the only HTML in the output is the + # ```` we injected — safe to render with ``|safe`` server-side. rows = conn.execute( "SELECT m.id, m.owner_id, m.chat_id, m.scene_id, " - " m.pov_summary, m.significance, m.created_at, " + " m.pov_summary, " + " snippet(memories_fts, 0, '', '', '…', 32) " + " AS snippet, " + " m.significance, m.created_at, " " memories_fts.rank " "FROM memories_fts " "JOIN memories m ON m.id = memories_fts.rowid " @@ -64,9 +81,10 @@ def search_all_memories( "chat_id": r[2], "scene_id": r[3], "pov_summary": r[4], - "significance": r[5], - "ts": r[6], - "fts_rank": r[7], + "snippet": r[5], + "significance": r[6], + "ts": r[7], + "fts_rank": r[8], } for r in rows ] diff --git a/chat/templates/search.html b/chat/templates/search.html index ee61c24..527ee86 100644 --- a/chat/templates/search.html +++ b/chat/templates/search.html @@ -28,7 +28,15 @@ {% if r.chat_name %}· {{ r.chat_name }}{% endif %} {% if r.scene_label %}· scene {{ r.scene_label }}{% endif %} -
{{ r.pov_summary }}
+ {# T111.1: ``r.snippet`` is the FTS5 ``snippet()`` excerpt with + each match wrapped in ``...``. ``|safe`` is + required so the marker tags survive Jinja's auto-escape; the + snippet is built by SQLite from indexed text, so the only + HTML in the string is the ```` we configured (any + special chars from the source content are passed through as + literal text, NOT as HTML). This is the only ``|safe`` filter + on the page — chat_id, owner_name, etc. remain auto-escaped. #} +
{{ r.snippet|safe }}
{% endfor %} diff --git a/chat/web/search.py b/chat/web/search.py index 458c7c7..cf1974a 100644 --- a/chat/web/search.py +++ b/chat/web/search.py @@ -200,6 +200,14 @@ async def search(request: Request, q: str = "", conn=Depends(get_conn)): scene.get("started_at") if scene else None ), "pov_summary": row["pov_summary"], + # T111.1: ``snippet`` is the FTS5 windowed excerpt with + # ```` tags around each match. Falls back to the + # full ``pov_summary`` if the row lacks a snippet (which + # shouldn't happen on this code path because every + # ``raw_results`` row came from a MATCH query, but we + # guard defensively so the template never renders + # ``None``). + "snippet": row.get("snippet") or row["pov_summary"], "significance": row["significance"], "ts": row["ts"], } diff --git a/tests/test_search_ux.py b/tests/test_search_ux.py index 013337b..5afbbb4 100644 --- a/tests/test_search_ux.py +++ b/tests/test_search_ux.py @@ -136,6 +136,22 @@ def test_result_links_navigate_to_chat(client, tmp_path): assert 'href="/chats/chat_a"' in resp.text +def test_search_results_include_fts_snippet_with_highlight(client, tmp_path): + """T111.1: FTS snippet() wraps each match in ``...`` so + the result row visually highlights the term that matched. + + The seeded ``pov_summary`` is ``the rabbit darted across chat_a``; + SQLite's ``snippet()`` returns the column text with each match token + wrapped — searching for ``rabbit`` yields a snippet containing + ``rabbit``. Assertion is just that the marker appears + (the snippet may be truncated with an ellipsis when the indexed text + runs longer than the configured token window).""" + _seed_two_chats_with_memories(tmp_path / "test.db") + resp = client.get("/search?q=rabbit") + assert resp.status_code == 200 + assert "rabbit" in resp.text + + def test_search_results_use_batched_lookups(client, tmp_path): """T106: hydration must not fan out to per-row ``get_bot``/ ``get_chat``/``get_scene`` calls.