feat: cross-chat search FTS snippet highlighting (T111.1)

Replace the ``pov_summary`` column in ``search_all_memories``'s SELECT
with ``snippet(memories_fts, 0, '<mark>', '</mark>', '…', 32)`` so each
match in a result row is wrapped in ``<mark>`` for the search-results
UI. The original ``pov_summary`` is still returned alongside as a
non-highlighted fallback. Template renders ``r.snippet|safe`` — the only
HTML in the snippet output is the configured ``<mark>`` markers, so it
is safe to bypass Jinja's auto-escape.
This commit is contained in:
Joseph Doherty
2026-04-27 05:30:32 -04:00
parent fae6edef6b
commit fa87ab8c55
4 changed files with 57 additions and 7 deletions
+24 -6
View File
@@ -26,13 +26,19 @@ def search_all_memories(
"""Search FTS5 across all owners and chats.
Returns rows with ``{memory_id, owner_id, chat_id, scene_id,
pov_summary, significance, ts, fts_rank}``, sorted by FTS5 BM25
rank ascending (lower rank = stronger match, surfaced first).
pov_summary, snippet, significance, ts, fts_rank}``, sorted by FTS5
BM25 rank ascending (lower rank = stronger match, surfaced first).
The ``memories`` table has no ``ts`` column; we expose ``created_at``
(the projector-side row insertion timestamp) under that key so the
UI does not have to know the storage name.
``snippet`` (T111.1) is the FTS5 ``snippet()`` output for the
matched ``pov_summary`` column: a windowed excerpt with each match
token wrapped in ``<mark>...</mark>`` for the search-results UI to
render verbatim. The full ``pov_summary`` is also returned so
non-highlighted callers (or fallbacks) keep the original string.
An empty / whitespace-only ``query`` short-circuits to ``[]`` to
avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar
"no input yet" state from triggering a full-table scan.
@@ -45,9 +51,20 @@ def search_all_memories(
# from the content table because the FTS index only stores
# ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns
# negative scores where lower is better.
#
# ``snippet(memories_fts, 0, ...)`` (T111.1) targets column 0 of the
# FTS virtual table, which is ``pov_summary`` (the only column
# indexed by ``CREATE VIRTUAL TABLE memories_fts USING fts5(
# pov_summary, ...)`` in migration 0006). SQLite passes the raw
# column text through verbatim aside from inserting the configured
# before/after match markers, so the only HTML in the output is the
# ``<mark>`` we injected — safe to render with ``|safe`` server-side.
rows = conn.execute(
"SELECT m.id, m.owner_id, m.chat_id, m.scene_id, "
" m.pov_summary, m.significance, m.created_at, "
" m.pov_summary, "
" snippet(memories_fts, 0, '<mark>', '</mark>', '', 32) "
" AS snippet, "
" m.significance, m.created_at, "
" memories_fts.rank "
"FROM memories_fts "
"JOIN memories m ON m.id = memories_fts.rowid "
@@ -64,9 +81,10 @@ def search_all_memories(
"chat_id": r[2],
"scene_id": r[3],
"pov_summary": r[4],
"significance": r[5],
"ts": r[6],
"fts_rank": r[7],
"snippet": r[5],
"significance": r[6],
"ts": r[7],
"fts_rank": r[8],
}
for r in rows
]
+9 -1
View File
@@ -28,7 +28,15 @@
{% if r.chat_name %}<span>&middot; {{ r.chat_name }}</span>{% endif %}
{% if r.scene_label %}<span>&middot; scene {{ r.scene_label }}</span>{% endif %}
</div>
<div class="search-result-summary">{{ r.pov_summary }}</div>
{# T111.1: ``r.snippet`` is the FTS5 ``snippet()`` excerpt with
each match wrapped in ``<mark>...</mark>``. ``|safe`` is
required so the marker tags survive Jinja's auto-escape; the
snippet is built by SQLite from indexed text, so the only
HTML in the string is the ``<mark>`` we configured (any
special chars from the source content are passed through as
literal text, NOT as HTML). This is the only ``|safe`` filter
on the page — chat_id, owner_name, etc. remain auto-escaped. #}
<div class="search-result-summary">{{ r.snippet|safe }}</div>
</a>
</li>
{% endfor %}
+8
View File
@@ -200,6 +200,14 @@ async def search(request: Request, q: str = "", conn=Depends(get_conn)):
scene.get("started_at") if scene else None
),
"pov_summary": row["pov_summary"],
# T111.1: ``snippet`` is the FTS5 windowed excerpt with
# ``<mark>`` tags around each match. Falls back to the
# full ``pov_summary`` if the row lacks a snippet (which
# shouldn't happen on this code path because every
# ``raw_results`` row came from a MATCH query, but we
# guard defensively so the template never renders
# ``None``).
"snippet": row.get("snippet") or row["pov_summary"],
"significance": row["significance"],
"ts": row["ts"],
}
+16
View File
@@ -136,6 +136,22 @@ def test_result_links_navigate_to_chat(client, tmp_path):
assert 'href="/chats/chat_a"' in resp.text
def test_search_results_include_fts_snippet_with_highlight(client, tmp_path):
"""T111.1: FTS snippet() wraps each match in ``<mark>...</mark>`` so
the result row visually highlights the term that matched.
The seeded ``pov_summary`` is ``the rabbit darted across chat_a``;
SQLite's ``snippet()`` returns the column text with each match token
wrapped — searching for ``rabbit`` yields a snippet containing
``<mark>rabbit</mark>``. Assertion is just that the marker appears
(the snippet may be truncated with an ellipsis when the indexed text
runs longer than the configured token window)."""
_seed_two_chats_with_memories(tmp_path / "test.db")
resp = client.get("/search?q=rabbit")
assert resp.status_code == 200
assert "<mark>rabbit</mark>" in resp.text
def test_search_results_use_batched_lookups(client, tmp_path):
"""T106: hydration must not fan out to per-row ``get_bot``/
``get_chat``/``get_scene`` calls.