feat: cross-chat search FTS snippet highlighting (T111.1)
Replace the ``pov_summary`` column in ``search_all_memories``'s SELECT with ``snippet(memories_fts, 0, '<mark>', '</mark>', '…', 32)`` so each match in a result row is wrapped in ``<mark>`` for the search-results UI. The original ``pov_summary`` is still returned alongside as a non-highlighted fallback. Template renders ``r.snippet|safe`` — the only HTML in the snippet output is the configured ``<mark>`` markers, so it is safe to bypass Jinja's auto-escape.
This commit is contained in:
@@ -26,13 +26,19 @@ def search_all_memories(
|
|||||||
"""Search FTS5 across all owners and chats.
|
"""Search FTS5 across all owners and chats.
|
||||||
|
|
||||||
Returns rows with ``{memory_id, owner_id, chat_id, scene_id,
|
Returns rows with ``{memory_id, owner_id, chat_id, scene_id,
|
||||||
pov_summary, significance, ts, fts_rank}``, sorted by FTS5 BM25
|
pov_summary, snippet, significance, ts, fts_rank}``, sorted by FTS5
|
||||||
rank ascending (lower rank = stronger match, surfaced first).
|
BM25 rank ascending (lower rank = stronger match, surfaced first).
|
||||||
|
|
||||||
The ``memories`` table has no ``ts`` column; we expose ``created_at``
|
The ``memories`` table has no ``ts`` column; we expose ``created_at``
|
||||||
(the projector-side row insertion timestamp) under that key so the
|
(the projector-side row insertion timestamp) under that key so the
|
||||||
UI does not have to know the storage name.
|
UI does not have to know the storage name.
|
||||||
|
|
||||||
|
``snippet`` (T111.1) is the FTS5 ``snippet()`` output for the
|
||||||
|
matched ``pov_summary`` column: a windowed excerpt with each match
|
||||||
|
token wrapped in ``<mark>...</mark>`` for the search-results UI to
|
||||||
|
render verbatim. The full ``pov_summary`` is also returned so
|
||||||
|
non-highlighted callers (or fallbacks) keep the original string.
|
||||||
|
|
||||||
An empty / whitespace-only ``query`` short-circuits to ``[]`` to
|
An empty / whitespace-only ``query`` short-circuits to ``[]`` to
|
||||||
avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar
|
avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar
|
||||||
"no input yet" state from triggering a full-table scan.
|
"no input yet" state from triggering a full-table scan.
|
||||||
@@ -45,9 +51,20 @@ def search_all_memories(
|
|||||||
# from the content table because the FTS index only stores
|
# from the content table because the FTS index only stores
|
||||||
# ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns
|
# ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns
|
||||||
# negative scores where lower is better.
|
# negative scores where lower is better.
|
||||||
|
#
|
||||||
|
# ``snippet(memories_fts, 0, ...)`` (T111.1) targets column 0 of the
|
||||||
|
# FTS virtual table, which is ``pov_summary`` (the only column
|
||||||
|
# indexed by ``CREATE VIRTUAL TABLE memories_fts USING fts5(
|
||||||
|
# pov_summary, ...)`` in migration 0006). SQLite passes the raw
|
||||||
|
# column text through verbatim aside from inserting the configured
|
||||||
|
# before/after match markers, so the only HTML in the output is the
|
||||||
|
# ``<mark>`` we injected — safe to render with ``|safe`` server-side.
|
||||||
rows = conn.execute(
|
rows = conn.execute(
|
||||||
"SELECT m.id, m.owner_id, m.chat_id, m.scene_id, "
|
"SELECT m.id, m.owner_id, m.chat_id, m.scene_id, "
|
||||||
" m.pov_summary, m.significance, m.created_at, "
|
" m.pov_summary, "
|
||||||
|
" snippet(memories_fts, 0, '<mark>', '</mark>', '…', 32) "
|
||||||
|
" AS snippet, "
|
||||||
|
" m.significance, m.created_at, "
|
||||||
" memories_fts.rank "
|
" memories_fts.rank "
|
||||||
"FROM memories_fts "
|
"FROM memories_fts "
|
||||||
"JOIN memories m ON m.id = memories_fts.rowid "
|
"JOIN memories m ON m.id = memories_fts.rowid "
|
||||||
@@ -64,9 +81,10 @@ def search_all_memories(
|
|||||||
"chat_id": r[2],
|
"chat_id": r[2],
|
||||||
"scene_id": r[3],
|
"scene_id": r[3],
|
||||||
"pov_summary": r[4],
|
"pov_summary": r[4],
|
||||||
"significance": r[5],
|
"snippet": r[5],
|
||||||
"ts": r[6],
|
"significance": r[6],
|
||||||
"fts_rank": r[7],
|
"ts": r[7],
|
||||||
|
"fts_rank": r[8],
|
||||||
}
|
}
|
||||||
for r in rows
|
for r in rows
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -28,7 +28,15 @@
|
|||||||
{% if r.chat_name %}<span>· {{ r.chat_name }}</span>{% endif %}
|
{% if r.chat_name %}<span>· {{ r.chat_name }}</span>{% endif %}
|
||||||
{% if r.scene_label %}<span>· scene {{ r.scene_label }}</span>{% endif %}
|
{% if r.scene_label %}<span>· scene {{ r.scene_label }}</span>{% endif %}
|
||||||
</div>
|
</div>
|
||||||
<div class="search-result-summary">{{ r.pov_summary }}</div>
|
{# T111.1: ``r.snippet`` is the FTS5 ``snippet()`` excerpt with
|
||||||
|
each match wrapped in ``<mark>...</mark>``. ``|safe`` is
|
||||||
|
required so the marker tags survive Jinja's auto-escape; the
|
||||||
|
snippet is built by SQLite from indexed text, so the only
|
||||||
|
HTML in the string is the ``<mark>`` we configured (any
|
||||||
|
special chars from the source content are passed through as
|
||||||
|
literal text, NOT as HTML). This is the only ``|safe`` filter
|
||||||
|
on the page — chat_id, owner_name, etc. remain auto-escaped. #}
|
||||||
|
<div class="search-result-summary">{{ r.snippet|safe }}</div>
|
||||||
</a>
|
</a>
|
||||||
</li>
|
</li>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|||||||
@@ -200,6 +200,14 @@ async def search(request: Request, q: str = "", conn=Depends(get_conn)):
|
|||||||
scene.get("started_at") if scene else None
|
scene.get("started_at") if scene else None
|
||||||
),
|
),
|
||||||
"pov_summary": row["pov_summary"],
|
"pov_summary": row["pov_summary"],
|
||||||
|
# T111.1: ``snippet`` is the FTS5 windowed excerpt with
|
||||||
|
# ``<mark>`` tags around each match. Falls back to the
|
||||||
|
# full ``pov_summary`` if the row lacks a snippet (which
|
||||||
|
# shouldn't happen on this code path because every
|
||||||
|
# ``raw_results`` row came from a MATCH query, but we
|
||||||
|
# guard defensively so the template never renders
|
||||||
|
# ``None``).
|
||||||
|
"snippet": row.get("snippet") or row["pov_summary"],
|
||||||
"significance": row["significance"],
|
"significance": row["significance"],
|
||||||
"ts": row["ts"],
|
"ts": row["ts"],
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -136,6 +136,22 @@ def test_result_links_navigate_to_chat(client, tmp_path):
|
|||||||
assert 'href="/chats/chat_a"' in resp.text
|
assert 'href="/chats/chat_a"' in resp.text
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_results_include_fts_snippet_with_highlight(client, tmp_path):
|
||||||
|
"""T111.1: FTS snippet() wraps each match in ``<mark>...</mark>`` so
|
||||||
|
the result row visually highlights the term that matched.
|
||||||
|
|
||||||
|
The seeded ``pov_summary`` is ``the rabbit darted across chat_a``;
|
||||||
|
SQLite's ``snippet()`` returns the column text with each match token
|
||||||
|
wrapped — searching for ``rabbit`` yields a snippet containing
|
||||||
|
``<mark>rabbit</mark>``. Assertion is just that the marker appears
|
||||||
|
(the snippet may be truncated with an ellipsis when the indexed text
|
||||||
|
runs longer than the configured token window)."""
|
||||||
|
_seed_two_chats_with_memories(tmp_path / "test.db")
|
||||||
|
resp = client.get("/search?q=rabbit")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert "<mark>rabbit</mark>" in resp.text
|
||||||
|
|
||||||
|
|
||||||
def test_search_results_use_batched_lookups(client, tmp_path):
|
def test_search_results_use_batched_lookups(client, tmp_path):
|
||||||
"""T106: hydration must not fan out to per-row ``get_bot``/
|
"""T106: hydration must not fan out to per-row ``get_bot``/
|
||||||
``get_chat``/``get_scene`` calls.
|
``get_chat``/``get_scene`` calls.
|
||||||
|
|||||||
Reference in New Issue
Block a user