merge: T111 search UX (FTS snippet + turn deep-link)

This commit is contained in:
Joseph Doherty
2026-04-27 05:42:48 -04:00
5 changed files with 118 additions and 20 deletions
+36 -8
View File
@@ -26,13 +26,28 @@ def search_all_memories(
"""Search FTS5 across all owners and chats. """Search FTS5 across all owners and chats.
Returns rows with ``{memory_id, owner_id, chat_id, scene_id, Returns rows with ``{memory_id, owner_id, chat_id, scene_id,
pov_summary, significance, ts, fts_rank}``, sorted by FTS5 BM25 event_id, pov_summary, snippet, significance, ts, fts_rank}``,
rank ascending (lower rank = stronger match, surfaced first). sorted by FTS5 BM25 rank ascending (lower rank = stronger match,
surfaced first).
``event_id`` (T111.2 / T109) is the id of the ``event_log`` row that
drove the projecting ``memory_written`` event. May be ``None`` for
memory rows projected before the 0014 schema migration ran (the
column is nullable on purpose; T109 did not backfill historical
rows). The search-results UI uses it to deep-link to the originating
turn anchor (Phase 3.5 T86 stamps ``id="turn-{event_id}"`` on each
turn DOM node) and falls back to a chat-level link when ``None``.
The ``memories`` table has no ``ts`` column; we expose ``created_at`` The ``memories`` table has no ``ts`` column; we expose ``created_at``
(the projector-side row insertion timestamp) under that key so the (the projector-side row insertion timestamp) under that key so the
UI does not have to know the storage name. UI does not have to know the storage name.
``snippet`` (T111.1) is the FTS5 ``snippet()`` output for the
matched ``pov_summary`` column: a windowed excerpt with each match
token wrapped in ``<mark>...</mark>`` for the search-results UI to
render verbatim. The full ``pov_summary`` is also returned so
non-highlighted callers (or fallbacks) keep the original string.
An empty / whitespace-only ``query`` short-circuits to ``[]`` to An empty / whitespace-only ``query`` short-circuits to ``[]`` to
avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar
"no input yet" state from triggering a full-table scan. "no input yet" state from triggering a full-table scan.
@@ -45,9 +60,20 @@ def search_all_memories(
# from the content table because the FTS index only stores # from the content table because the FTS index only stores
# ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns # ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns
# negative scores where lower is better. # negative scores where lower is better.
#
# ``snippet(memories_fts, 0, ...)`` (T111.1) targets column 0 of the
# FTS virtual table, which is ``pov_summary`` (the only column
# indexed by ``CREATE VIRTUAL TABLE memories_fts USING fts5(
# pov_summary, ...)`` in migration 0006). SQLite passes the raw
# column text through verbatim aside from inserting the configured
# before/after match markers, so the only HTML in the output is the
# ``<mark>`` we injected — safe to render with ``|safe`` server-side.
rows = conn.execute( rows = conn.execute(
"SELECT m.id, m.owner_id, m.chat_id, m.scene_id, " "SELECT m.id, m.owner_id, m.chat_id, m.scene_id, m.event_id, "
" m.pov_summary, m.significance, m.created_at, " " m.pov_summary, "
" snippet(memories_fts, 0, '<mark>', '</mark>', '', 32) "
" AS snippet, "
" m.significance, m.created_at, "
" memories_fts.rank " " memories_fts.rank "
"FROM memories_fts " "FROM memories_fts "
"JOIN memories m ON m.id = memories_fts.rowid " "JOIN memories m ON m.id = memories_fts.rowid "
@@ -63,10 +89,12 @@ def search_all_memories(
"owner_id": r[1], "owner_id": r[1],
"chat_id": r[2], "chat_id": r[2],
"scene_id": r[3], "scene_id": r[3],
"pov_summary": r[4], "event_id": r[4],
"significance": r[5], "pov_summary": r[5],
"ts": r[6], "snippet": r[6],
"fts_rank": r[7], "significance": r[7],
"ts": r[8],
"fts_rank": r[9],
} }
for r in rows for r in rows
] ]
+17 -2
View File
@@ -21,14 +21,29 @@
<ul class="search-results"> <ul class="search-results">
{% for r in results %} {% for r in results %}
<li class="search-result"> <li class="search-result">
<a class="search-result-link" href="/chats/{{ r.chat_id }}"> {# T111.2: deep-link to the originating turn via the
``id="turn-{event_id}"`` anchor stamped by Phase 3.5 T86.
``event_id`` may be NULL for memory rows projected before the
0014 migration ran (T109 did not backfill historical rows); in
that case fall back to a chat-level link with no anchor so we
never emit ``#turn-None``. #}
<a class="search-result-link"
href="/chats/{{ r.chat_id }}{% if r.event_id %}#turn-{{ r.event_id }}{% endif %}">
<div class="search-result-meta muted"> <div class="search-result-meta muted">
<strong>{{ r.owner_name }}</strong> <strong>{{ r.owner_name }}</strong>
<span>&middot; {{ r.chat_id }}</span> <span>&middot; {{ r.chat_id }}</span>
{% if r.chat_name %}<span>&middot; {{ r.chat_name }}</span>{% endif %} {% if r.chat_name %}<span>&middot; {{ r.chat_name }}</span>{% endif %}
{% if r.scene_label %}<span>&middot; scene {{ r.scene_label }}</span>{% endif %} {% if r.scene_label %}<span>&middot; scene {{ r.scene_label }}</span>{% endif %}
</div> </div>
<div class="search-result-summary">{{ r.pov_summary }}</div> {# T111.1: ``r.snippet`` is the FTS5 ``snippet()`` excerpt with
each match wrapped in ``<mark>...</mark>``. ``|safe`` is
required so the marker tags survive Jinja's auto-escape; the
snippet is built by SQLite from indexed text, so the only
HTML in the string is the ``<mark>`` we configured (any
special chars from the source content are passed through as
literal text, NOT as HTML). This is the only ``|safe`` filter
on the page — chat_id, owner_name, etc. remain auto-escaped. #}
<div class="search-result-summary">{{ r.snippet|safe }}</div>
</a> </a>
</li> </li>
{% endfor %} {% endfor %}
+15
View File
@@ -193,6 +193,13 @@ async def search(request: Request, q: str = "", conn=Depends(get_conn)):
chat.get("narrative_anchor") if chat else None chat.get("narrative_anchor") if chat else None
), ),
"scene_id": row["scene_id"], "scene_id": row["scene_id"],
# T111.2: event_id deep-links to the originating turn
# via the ``id="turn-{event_id}"`` anchor that Phase 3.5
# T86 stamps on each turn DOM node. May be ``None`` for
# memory rows projected before the 0014 migration ran
# (T109 did not backfill historical rows); the template
# falls back to a chat-level link in that case.
"event_id": row["event_id"],
# Scenes have no ``title`` column today; surface the # Scenes have no ``title`` column today; surface the
# ``started_at`` timestamp as a human-friendly label # ``started_at`` timestamp as a human-friendly label
# when a scene is set, otherwise leave it blank. # when a scene is set, otherwise leave it blank.
@@ -200,6 +207,14 @@ async def search(request: Request, q: str = "", conn=Depends(get_conn)):
scene.get("started_at") if scene else None scene.get("started_at") if scene else None
), ),
"pov_summary": row["pov_summary"], "pov_summary": row["pov_summary"],
# T111.1: ``snippet`` is the FTS5 windowed excerpt with
# ``<mark>`` tags around each match. Falls back to the
# full ``pov_summary`` if the row lacks a snippet (which
# shouldn't happen on this code path because every
# ``raw_results`` row came from a MATCH query, but we
# guard defensively so the template never renders
# ``None``).
"snippet": row.get("snippet") or row["pov_summary"],
"significance": row["significance"], "significance": row["significance"],
"ts": row["ts"], "ts": row["ts"],
} }
+8 -6
View File
@@ -867,12 +867,14 @@ def test_cross_chat_search_surfaces_memories_in_three_chats(
assert response.status_code == 200 assert response.status_code == 200
body = response.text body = response.text
# Each chat_id appears in a result link href, e.g. # Each chat_id appears in a result link href. T111.2 deep-links to
# ``href="/chats/chat_bot_a"``. The template renders one # the originating turn so the href is now
# ``<a class="search-result-link" href="/chats/{chat_id}">`` per # ``href="/chats/{chat_id}#turn-{event_id}"``; we assert on the
# row, so a substring match per chat is sufficient. # ``"/chats/{chat_id}#turn-`` prefix so the per-chat link is
# uniquely matched (a bare ``"/chats/chat_bot_a`` substring would
# also match ``chat_bot_a_2`` / ``chat_bot_a_3``).
for chat_id in chat_ids: for chat_id in chat_ids:
assert f'href="/chats/{chat_id}"' in body, ( assert f'href="/chats/{chat_id}#turn-' in body, (
f"chat {chat_id} missing from /search results: {body!r}" f"chat {chat_id} missing from /search results: {body!r}"
) )
# The owner display name (BotA) renders for each row — verify >= 3 # The owner display name (BotA) renders for each row — verify >= 3
@@ -888,4 +890,4 @@ def test_cross_chat_search_surfaces_memories_in_three_chats(
# The "no matches" empty-state copy fires. # The "no matches" empty-state copy fires.
assert "No matches" in distractor_body assert "No matches" in distractor_body
for chat_id in chat_ids: for chat_id in chat_ids:
assert f'href="/chats/{chat_id}"' not in distractor_body assert f'href="/chats/{chat_id}#turn-' not in distractor_body
+42 -4
View File
@@ -127,13 +127,51 @@ def test_empty_query_renders_placeholder_not_results(client, tmp_path):
def test_result_links_navigate_to_chat(client, tmp_path): def test_result_links_navigate_to_chat(client, tmp_path):
"""Each result links back to its originating chat so the user can """Each result links back to its originating chat so the user can
reopen the thread where the memory was first witnessed.""" reopen the thread where the memory was first witnessed.
Post-T111.2: the link now includes a turn anchor when the memory
row carries an ``event_id`` (T109's nullable column is populated for
rows projected after migration 0014 ran). We assert on the chat-id
portion of the href because the exact event id is autoincrement and
depends on seed order; the dedicated
``test_search_result_link_includes_turn_anchor`` test below pins the
anchor format itself."""
_seed_two_chats_with_memories(tmp_path / "test.db") _seed_two_chats_with_memories(tmp_path / "test.db")
resp = client.get("/search?q=rabbit") resp = client.get("/search?q=rabbit")
assert resp.status_code == 200 assert resp.status_code == 200
# The link target is chat-level (memories don't carry an event_id assert 'href="/chats/chat_a' in resp.text
# column today, so we don't deep-link to a specific turn).
assert 'href="/chats/chat_a"' in resp.text
def test_search_results_include_fts_snippet_with_highlight(client, tmp_path):
"""T111.1: FTS snippet() wraps each match in ``<mark>...</mark>`` so
the result row visually highlights the term that matched.
The seeded ``pov_summary`` is ``the rabbit darted across chat_a``;
SQLite's ``snippet()`` returns the column text with each match token
wrapped — searching for ``rabbit`` yields a snippet containing
``<mark>rabbit</mark>``. Assertion is just that the marker appears
(the snippet may be truncated with an ellipsis when the indexed text
runs longer than the configured token window)."""
_seed_two_chats_with_memories(tmp_path / "test.db")
resp = client.get("/search?q=rabbit")
assert resp.status_code == 200
assert "<mark>rabbit</mark>" in resp.text
def test_search_result_link_includes_turn_anchor(client, tmp_path):
"""T111.2: result links deep-link to the originating turn via the
chat-page anchor stamped by Phase 3.5 T86 (``id="turn-{event_id}"``).
The seeded ``memory_written`` events are projected with
``memories.event_id`` populated (T109); the route exposes that id and
the template builds the link as ``/chats/{chat_id}#turn-{event_id}``.
We don't assert a specific event id (it's an autoincrement that
depends on seed order), only that *some* turn anchor is present for
the chat link the user is about to click."""
_seed_two_chats_with_memories(tmp_path / "test.db")
resp = client.get("/search?q=rabbit")
assert resp.status_code == 200
assert "/chats/chat_a#turn-" in resp.text
def test_search_results_use_batched_lookups(client, tmp_path): def test_search_results_use_batched_lookups(client, tmp_path):