From fa87ab8c552acf722a98e864f0dfbc0e8c0bcdcc Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 05:30:32 -0400 Subject: [PATCH 1/2] feat: cross-chat search FTS snippet highlighting (T111.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the ``pov_summary`` column in ``search_all_memories``'s SELECT with ``snippet(memories_fts, 0, '', '', '…', 32)`` so each match in a result row is wrapped in ```` for the search-results UI. The original ``pov_summary`` is still returned alongside as a non-highlighted fallback. Template renders ``r.snippet|safe`` — the only HTML in the snippet output is the configured ```` markers, so it is safe to bypass Jinja's auto-escape. --- chat/services/cross_chat_search.py | 30 ++++++++++++++++++++++++------ chat/templates/search.html | 10 +++++++++- chat/web/search.py | 8 ++++++++ tests/test_search_ux.py | 16 ++++++++++++++++ 4 files changed, 57 insertions(+), 7 deletions(-) diff --git a/chat/services/cross_chat_search.py b/chat/services/cross_chat_search.py index cb0403f..2e10f71 100644 --- a/chat/services/cross_chat_search.py +++ b/chat/services/cross_chat_search.py @@ -26,13 +26,19 @@ def search_all_memories( """Search FTS5 across all owners and chats. Returns rows with ``{memory_id, owner_id, chat_id, scene_id, - pov_summary, significance, ts, fts_rank}``, sorted by FTS5 BM25 - rank ascending (lower rank = stronger match, surfaced first). + pov_summary, snippet, significance, ts, fts_rank}``, sorted by FTS5 + BM25 rank ascending (lower rank = stronger match, surfaced first). The ``memories`` table has no ``ts`` column; we expose ``created_at`` (the projector-side row insertion timestamp) under that key so the UI does not have to know the storage name. + ``snippet`` (T111.1) is the FTS5 ``snippet()`` output for the + matched ``pov_summary`` column: a windowed excerpt with each match + token wrapped in ``...`` for the search-results UI to + render verbatim. The full ``pov_summary`` is also returned so + non-highlighted callers (or fallbacks) keep the original string. + An empty / whitespace-only ``query`` short-circuits to ``[]`` to avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar "no input yet" state from triggering a full-table scan. @@ -45,9 +51,20 @@ def search_all_memories( # from the content table because the FTS index only stores # ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns # negative scores where lower is better. + # + # ``snippet(memories_fts, 0, ...)`` (T111.1) targets column 0 of the + # FTS virtual table, which is ``pov_summary`` (the only column + # indexed by ``CREATE VIRTUAL TABLE memories_fts USING fts5( + # pov_summary, ...)`` in migration 0006). SQLite passes the raw + # column text through verbatim aside from inserting the configured + # before/after match markers, so the only HTML in the output is the + # ```` we injected — safe to render with ``|safe`` server-side. rows = conn.execute( "SELECT m.id, m.owner_id, m.chat_id, m.scene_id, " - " m.pov_summary, m.significance, m.created_at, " + " m.pov_summary, " + " snippet(memories_fts, 0, '', '', '…', 32) " + " AS snippet, " + " m.significance, m.created_at, " " memories_fts.rank " "FROM memories_fts " "JOIN memories m ON m.id = memories_fts.rowid " @@ -64,9 +81,10 @@ def search_all_memories( "chat_id": r[2], "scene_id": r[3], "pov_summary": r[4], - "significance": r[5], - "ts": r[6], - "fts_rank": r[7], + "snippet": r[5], + "significance": r[6], + "ts": r[7], + "fts_rank": r[8], } for r in rows ] diff --git a/chat/templates/search.html b/chat/templates/search.html index ee61c24..527ee86 100644 --- a/chat/templates/search.html +++ b/chat/templates/search.html @@ -28,7 +28,15 @@ {% if r.chat_name %}· {{ r.chat_name }}{% endif %} {% if r.scene_label %}· scene {{ r.scene_label }}{% endif %} -
{{ r.pov_summary }}
+ {# T111.1: ``r.snippet`` is the FTS5 ``snippet()`` excerpt with + each match wrapped in ``...``. ``|safe`` is + required so the marker tags survive Jinja's auto-escape; the + snippet is built by SQLite from indexed text, so the only + HTML in the string is the ```` we configured (any + special chars from the source content are passed through as + literal text, NOT as HTML). This is the only ``|safe`` filter + on the page — chat_id, owner_name, etc. remain auto-escaped. #} +
{{ r.snippet|safe }}
{% endfor %} diff --git a/chat/web/search.py b/chat/web/search.py index 458c7c7..cf1974a 100644 --- a/chat/web/search.py +++ b/chat/web/search.py @@ -200,6 +200,14 @@ async def search(request: Request, q: str = "", conn=Depends(get_conn)): scene.get("started_at") if scene else None ), "pov_summary": row["pov_summary"], + # T111.1: ``snippet`` is the FTS5 windowed excerpt with + # ```` tags around each match. Falls back to the + # full ``pov_summary`` if the row lacks a snippet (which + # shouldn't happen on this code path because every + # ``raw_results`` row came from a MATCH query, but we + # guard defensively so the template never renders + # ``None``). + "snippet": row.get("snippet") or row["pov_summary"], "significance": row["significance"], "ts": row["ts"], } diff --git a/tests/test_search_ux.py b/tests/test_search_ux.py index 013337b..5afbbb4 100644 --- a/tests/test_search_ux.py +++ b/tests/test_search_ux.py @@ -136,6 +136,22 @@ def test_result_links_navigate_to_chat(client, tmp_path): assert 'href="/chats/chat_a"' in resp.text +def test_search_results_include_fts_snippet_with_highlight(client, tmp_path): + """T111.1: FTS snippet() wraps each match in ``...`` so + the result row visually highlights the term that matched. + + The seeded ``pov_summary`` is ``the rabbit darted across chat_a``; + SQLite's ``snippet()`` returns the column text with each match token + wrapped — searching for ``rabbit`` yields a snippet containing + ``rabbit``. Assertion is just that the marker appears + (the snippet may be truncated with an ellipsis when the indexed text + runs longer than the configured token window).""" + _seed_two_chats_with_memories(tmp_path / "test.db") + resp = client.get("/search?q=rabbit") + assert resp.status_code == 200 + assert "rabbit" in resp.text + + def test_search_results_use_batched_lookups(client, tmp_path): """T106: hydration must not fan out to per-row ``get_bot``/ ``get_chat``/``get_scene`` calls. From 9987da2c0747a8a7761a6c38353e1659361d85d6 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 05:42:17 -0400 Subject: [PATCH 2/2] feat: cross-chat search deep-links to turn via memories.event_id (T111.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ``m.event_id`` (T109's nullable column from migration 0014) to ``search_all_memories``'s SELECT, propagate it through the route's template context, and have ``search.html`` build result links as ``/chats/{chat_id}#turn-{event_id}`` — matching the ``id="turn-{event_id}"`` anchor that Phase 3.5 T86 stamps on each turn DOM node so the chat page scrolls to the originating turn on load. Memory rows projected before the 0014 migration ran read NULL ``event_id``; the template falls back to a chat-level link in that case so we never emit ``#turn-None``. Pre-existing tests that asserted on the bare ``href="/chats/{chat_id}"`` contract are updated to assert on the ``href="/chats/{chat_id}#turn-`` prefix to reflect the new deep-link. --- chat/services/cross_chat_search.py | 26 ++++++++++++++++++-------- chat/templates/search.html | 9 ++++++++- chat/web/search.py | 7 +++++++ tests/test_phase4_integration.py | 14 ++++++++------ tests/test_search_ux.py | 30 ++++++++++++++++++++++++++---- 5 files changed, 67 insertions(+), 19 deletions(-) diff --git a/chat/services/cross_chat_search.py b/chat/services/cross_chat_search.py index 2e10f71..d582610 100644 --- a/chat/services/cross_chat_search.py +++ b/chat/services/cross_chat_search.py @@ -26,8 +26,17 @@ def search_all_memories( """Search FTS5 across all owners and chats. Returns rows with ``{memory_id, owner_id, chat_id, scene_id, - pov_summary, snippet, significance, ts, fts_rank}``, sorted by FTS5 - BM25 rank ascending (lower rank = stronger match, surfaced first). + event_id, pov_summary, snippet, significance, ts, fts_rank}``, + sorted by FTS5 BM25 rank ascending (lower rank = stronger match, + surfaced first). + + ``event_id`` (T111.2 / T109) is the id of the ``event_log`` row that + drove the projecting ``memory_written`` event. May be ``None`` for + memory rows projected before the 0014 schema migration ran (the + column is nullable on purpose; T109 did not backfill historical + rows). The search-results UI uses it to deep-link to the originating + turn anchor (Phase 3.5 T86 stamps ``id="turn-{event_id}"`` on each + turn DOM node) and falls back to a chat-level link when ``None``. The ``memories`` table has no ``ts`` column; we expose ``created_at`` (the projector-side row insertion timestamp) under that key so the @@ -60,7 +69,7 @@ def search_all_memories( # before/after match markers, so the only HTML in the output is the # ```` we injected — safe to render with ``|safe`` server-side. rows = conn.execute( - "SELECT m.id, m.owner_id, m.chat_id, m.scene_id, " + "SELECT m.id, m.owner_id, m.chat_id, m.scene_id, m.event_id, " " m.pov_summary, " " snippet(memories_fts, 0, '', '', '…', 32) " " AS snippet, " @@ -80,11 +89,12 @@ def search_all_memories( "owner_id": r[1], "chat_id": r[2], "scene_id": r[3], - "pov_summary": r[4], - "snippet": r[5], - "significance": r[6], - "ts": r[7], - "fts_rank": r[8], + "event_id": r[4], + "pov_summary": r[5], + "snippet": r[6], + "significance": r[7], + "ts": r[8], + "fts_rank": r[9], } for r in rows ] diff --git a/chat/templates/search.html b/chat/templates/search.html index 527ee86..ce0e8c7 100644 --- a/chat/templates/search.html +++ b/chat/templates/search.html @@ -21,7 +21,14 @@