feat: cross-chat search FTS snippet highlighting (T111.1)
Replace the ``pov_summary`` column in ``search_all_memories``'s SELECT with ``snippet(memories_fts, 0, '<mark>', '</mark>', '…', 32)`` so each match in a result row is wrapped in ``<mark>`` for the search-results UI. The original ``pov_summary`` is still returned alongside as a non-highlighted fallback. Template renders ``r.snippet|safe`` — the only HTML in the snippet output is the configured ``<mark>`` markers, so it is safe to bypass Jinja's auto-escape.
This commit is contained in:
@@ -26,13 +26,19 @@ def search_all_memories(
|
||||
"""Search FTS5 across all owners and chats.
|
||||
|
||||
Returns rows with ``{memory_id, owner_id, chat_id, scene_id,
|
||||
pov_summary, significance, ts, fts_rank}``, sorted by FTS5 BM25
|
||||
rank ascending (lower rank = stronger match, surfaced first).
|
||||
pov_summary, snippet, significance, ts, fts_rank}``, sorted by FTS5
|
||||
BM25 rank ascending (lower rank = stronger match, surfaced first).
|
||||
|
||||
The ``memories`` table has no ``ts`` column; we expose ``created_at``
|
||||
(the projector-side row insertion timestamp) under that key so the
|
||||
UI does not have to know the storage name.
|
||||
|
||||
``snippet`` (T111.1) is the FTS5 ``snippet()`` output for the
|
||||
matched ``pov_summary`` column: a windowed excerpt with each match
|
||||
token wrapped in ``<mark>...</mark>`` for the search-results UI to
|
||||
render verbatim. The full ``pov_summary`` is also returned so
|
||||
non-highlighted callers (or fallbacks) keep the original string.
|
||||
|
||||
An empty / whitespace-only ``query`` short-circuits to ``[]`` to
|
||||
avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar
|
||||
"no input yet" state from triggering a full-table scan.
|
||||
@@ -45,9 +51,20 @@ def search_all_memories(
|
||||
# from the content table because the FTS index only stores
|
||||
# ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns
|
||||
# negative scores where lower is better.
|
||||
#
|
||||
# ``snippet(memories_fts, 0, ...)`` (T111.1) targets column 0 of the
|
||||
# FTS virtual table, which is ``pov_summary`` (the only column
|
||||
# indexed by ``CREATE VIRTUAL TABLE memories_fts USING fts5(
|
||||
# pov_summary, ...)`` in migration 0006). SQLite passes the raw
|
||||
# column text through verbatim aside from inserting the configured
|
||||
# before/after match markers, so the only HTML in the output is the
|
||||
# ``<mark>`` we injected — safe to render with ``|safe`` server-side.
|
||||
rows = conn.execute(
|
||||
"SELECT m.id, m.owner_id, m.chat_id, m.scene_id, "
|
||||
" m.pov_summary, m.significance, m.created_at, "
|
||||
" m.pov_summary, "
|
||||
" snippet(memories_fts, 0, '<mark>', '</mark>', '…', 32) "
|
||||
" AS snippet, "
|
||||
" m.significance, m.created_at, "
|
||||
" memories_fts.rank "
|
||||
"FROM memories_fts "
|
||||
"JOIN memories m ON m.id = memories_fts.rowid "
|
||||
@@ -64,9 +81,10 @@ def search_all_memories(
|
||||
"chat_id": r[2],
|
||||
"scene_id": r[3],
|
||||
"pov_summary": r[4],
|
||||
"significance": r[5],
|
||||
"ts": r[6],
|
||||
"fts_rank": r[7],
|
||||
"snippet": r[5],
|
||||
"significance": r[6],
|
||||
"ts": r[7],
|
||||
"fts_rank": r[8],
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
@@ -28,7 +28,15 @@
|
||||
{% if r.chat_name %}<span>· {{ r.chat_name }}</span>{% endif %}
|
||||
{% if r.scene_label %}<span>· scene {{ r.scene_label }}</span>{% endif %}
|
||||
</div>
|
||||
<div class="search-result-summary">{{ r.pov_summary }}</div>
|
||||
{# T111.1: ``r.snippet`` is the FTS5 ``snippet()`` excerpt with
|
||||
each match wrapped in ``<mark>...</mark>``. ``|safe`` is
|
||||
required so the marker tags survive Jinja's auto-escape; the
|
||||
snippet is built by SQLite from indexed text, so the only
|
||||
HTML in the string is the ``<mark>`` we configured (any
|
||||
special chars from the source content are passed through as
|
||||
literal text, NOT as HTML). This is the only ``|safe`` filter
|
||||
on the page — chat_id, owner_name, etc. remain auto-escaped. #}
|
||||
<div class="search-result-summary">{{ r.snippet|safe }}</div>
|
||||
</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
|
||||
@@ -200,6 +200,14 @@ async def search(request: Request, q: str = "", conn=Depends(get_conn)):
|
||||
scene.get("started_at") if scene else None
|
||||
),
|
||||
"pov_summary": row["pov_summary"],
|
||||
# T111.1: ``snippet`` is the FTS5 windowed excerpt with
|
||||
# ``<mark>`` tags around each match. Falls back to the
|
||||
# full ``pov_summary`` if the row lacks a snippet (which
|
||||
# shouldn't happen on this code path because every
|
||||
# ``raw_results`` row came from a MATCH query, but we
|
||||
# guard defensively so the template never renders
|
||||
# ``None``).
|
||||
"snippet": row.get("snippet") or row["pov_summary"],
|
||||
"significance": row["significance"],
|
||||
"ts": row["ts"],
|
||||
}
|
||||
|
||||
@@ -136,6 +136,22 @@ def test_result_links_navigate_to_chat(client, tmp_path):
|
||||
assert 'href="/chats/chat_a"' in resp.text
|
||||
|
||||
|
||||
def test_search_results_include_fts_snippet_with_highlight(client, tmp_path):
|
||||
"""T111.1: FTS snippet() wraps each match in ``<mark>...</mark>`` so
|
||||
the result row visually highlights the term that matched.
|
||||
|
||||
The seeded ``pov_summary`` is ``the rabbit darted across chat_a``;
|
||||
SQLite's ``snippet()`` returns the column text with each match token
|
||||
wrapped — searching for ``rabbit`` yields a snippet containing
|
||||
``<mark>rabbit</mark>``. Assertion is just that the marker appears
|
||||
(the snippet may be truncated with an ellipsis when the indexed text
|
||||
runs longer than the configured token window)."""
|
||||
_seed_two_chats_with_memories(tmp_path / "test.db")
|
||||
resp = client.get("/search?q=rabbit")
|
||||
assert resp.status_code == 200
|
||||
assert "<mark>rabbit</mark>" in resp.text
|
||||
|
||||
|
||||
def test_search_results_use_batched_lookups(client, tmp_path):
|
||||
"""T106: hydration must not fan out to per-row ``get_bot``/
|
||||
``get_chat``/``get_scene`` calls.
|
||||
|
||||
Reference in New Issue
Block a user