merge: T111 search UX (FTS snippet + turn deep-link)

This commit is contained in:
Joseph Doherty
2026-04-27 05:42:48 -04:00
5 changed files with 118 additions and 20 deletions
+36 -8
View File
@@ -26,13 +26,28 @@ def search_all_memories(
"""Search FTS5 across all owners and chats.
Returns rows with ``{memory_id, owner_id, chat_id, scene_id,
pov_summary, significance, ts, fts_rank}``, sorted by FTS5 BM25
rank ascending (lower rank = stronger match, surfaced first).
event_id, pov_summary, snippet, significance, ts, fts_rank}``,
sorted by FTS5 BM25 rank ascending (lower rank = stronger match,
surfaced first).
``event_id`` (T111.2 / T109) is the id of the ``event_log`` row that
drove the projecting ``memory_written`` event. May be ``None`` for
memory rows projected before the 0014 schema migration ran (the
column is nullable on purpose; T109 did not backfill historical
rows). The search-results UI uses it to deep-link to the originating
turn anchor (Phase 3.5 T86 stamps ``id="turn-{event_id}"`` on each
turn DOM node) and falls back to a chat-level link when ``None``.
The ``memories`` table has no ``ts`` column; we expose ``created_at``
(the projector-side row insertion timestamp) under that key so the
UI does not have to know the storage name.
``snippet`` (T111.1) is the FTS5 ``snippet()`` output for the
matched ``pov_summary`` column: a windowed excerpt with each match
token wrapped in ``<mark>...</mark>`` for the search-results UI to
render verbatim. The full ``pov_summary`` is also returned so
non-highlighted callers (or fallbacks) keep the original string.
An empty / whitespace-only ``query`` short-circuits to ``[]`` to
avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar
"no input yet" state from triggering a full-table scan.
@@ -45,9 +60,20 @@ def search_all_memories(
# from the content table because the FTS index only stores
# ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns
# negative scores where lower is better.
#
# ``snippet(memories_fts, 0, ...)`` (T111.1) targets column 0 of the
# FTS virtual table, which is ``pov_summary`` (the only column
# indexed by ``CREATE VIRTUAL TABLE memories_fts USING fts5(
# pov_summary, ...)`` in migration 0006). SQLite passes the raw
# column text through verbatim aside from inserting the configured
# before/after match markers, so the only HTML in the output is the
# ``<mark>`` we injected — safe to render with ``|safe`` server-side.
rows = conn.execute(
"SELECT m.id, m.owner_id, m.chat_id, m.scene_id, "
" m.pov_summary, m.significance, m.created_at, "
"SELECT m.id, m.owner_id, m.chat_id, m.scene_id, m.event_id, "
" m.pov_summary, "
" snippet(memories_fts, 0, '<mark>', '</mark>', '', 32) "
" AS snippet, "
" m.significance, m.created_at, "
" memories_fts.rank "
"FROM memories_fts "
"JOIN memories m ON m.id = memories_fts.rowid "
@@ -63,10 +89,12 @@ def search_all_memories(
"owner_id": r[1],
"chat_id": r[2],
"scene_id": r[3],
"pov_summary": r[4],
"significance": r[5],
"ts": r[6],
"fts_rank": r[7],
"event_id": r[4],
"pov_summary": r[5],
"snippet": r[6],
"significance": r[7],
"ts": r[8],
"fts_rank": r[9],
}
for r in rows
]
+17 -2
View File
@@ -21,14 +21,29 @@
<ul class="search-results">
{% for r in results %}
<li class="search-result">
<a class="search-result-link" href="/chats/{{ r.chat_id }}">
{# T111.2: deep-link to the originating turn via the
``id="turn-{event_id}"`` anchor stamped by Phase 3.5 T86.
``event_id`` may be NULL for memory rows projected before the
0014 migration ran (T109 did not backfill historical rows); in
that case fall back to a chat-level link with no anchor so we
never emit ``#turn-None``. #}
<a class="search-result-link"
href="/chats/{{ r.chat_id }}{% if r.event_id %}#turn-{{ r.event_id }}{% endif %}">
<div class="search-result-meta muted">
<strong>{{ r.owner_name }}</strong>
<span>&middot; {{ r.chat_id }}</span>
{% if r.chat_name %}<span>&middot; {{ r.chat_name }}</span>{% endif %}
{% if r.scene_label %}<span>&middot; scene {{ r.scene_label }}</span>{% endif %}
</div>
<div class="search-result-summary">{{ r.pov_summary }}</div>
{# T111.1: ``r.snippet`` is the FTS5 ``snippet()`` excerpt with
each match wrapped in ``<mark>...</mark>``. ``|safe`` is
required so the marker tags survive Jinja's auto-escape; the
snippet is built by SQLite from indexed text, so the only
HTML in the string is the ``<mark>`` we configured (any
special chars from the source content are passed through as
literal text, NOT as HTML). This is the only ``|safe`` filter
on the page — chat_id, owner_name, etc. remain auto-escaped. #}
<div class="search-result-summary">{{ r.snippet|safe }}</div>
</a>
</li>
{% endfor %}
+15
View File
@@ -193,6 +193,13 @@ async def search(request: Request, q: str = "", conn=Depends(get_conn)):
chat.get("narrative_anchor") if chat else None
),
"scene_id": row["scene_id"],
# T111.2: event_id deep-links to the originating turn
# via the ``id="turn-{event_id}"`` anchor that Phase 3.5
# T86 stamps on each turn DOM node. May be ``None`` for
# memory rows projected before the 0014 migration ran
# (T109 did not backfill historical rows); the template
# falls back to a chat-level link in that case.
"event_id": row["event_id"],
# Scenes have no ``title`` column today; surface the
# ``started_at`` timestamp as a human-friendly label
# when a scene is set, otherwise leave it blank.
@@ -200,6 +207,14 @@ async def search(request: Request, q: str = "", conn=Depends(get_conn)):
scene.get("started_at") if scene else None
),
"pov_summary": row["pov_summary"],
# T111.1: ``snippet`` is the FTS5 windowed excerpt with
# ``<mark>`` tags around each match. Falls back to the
# full ``pov_summary`` if the row lacks a snippet (which
# shouldn't happen on this code path because every
# ``raw_results`` row came from a MATCH query, but we
# guard defensively so the template never renders
# ``None``).
"snippet": row.get("snippet") or row["pov_summary"],
"significance": row["significance"],
"ts": row["ts"],
}
+8 -6
View File
@@ -867,12 +867,14 @@ def test_cross_chat_search_surfaces_memories_in_three_chats(
assert response.status_code == 200
body = response.text
# Each chat_id appears in a result link href, e.g.
# ``href="/chats/chat_bot_a"``. The template renders one
# ``<a class="search-result-link" href="/chats/{chat_id}">`` per
# row, so a substring match per chat is sufficient.
# Each chat_id appears in a result link href. T111.2 deep-links to
# the originating turn so the href is now
# ``href="/chats/{chat_id}#turn-{event_id}"``; we assert on the
# ``"/chats/{chat_id}#turn-`` prefix so the per-chat link is
# uniquely matched (a bare ``"/chats/chat_bot_a`` substring would
# also match ``chat_bot_a_2`` / ``chat_bot_a_3``).
for chat_id in chat_ids:
assert f'href="/chats/{chat_id}"' in body, (
assert f'href="/chats/{chat_id}#turn-' in body, (
f"chat {chat_id} missing from /search results: {body!r}"
)
# The owner display name (BotA) renders for each row — verify >= 3
@@ -888,4 +890,4 @@ def test_cross_chat_search_surfaces_memories_in_three_chats(
# The "no matches" empty-state copy fires.
assert "No matches" in distractor_body
for chat_id in chat_ids:
assert f'href="/chats/{chat_id}"' not in distractor_body
assert f'href="/chats/{chat_id}#turn-' not in distractor_body
+42 -4
View File
@@ -127,13 +127,51 @@ def test_empty_query_renders_placeholder_not_results(client, tmp_path):
def test_result_links_navigate_to_chat(client, tmp_path):
"""Each result links back to its originating chat so the user can
reopen the thread where the memory was first witnessed."""
reopen the thread where the memory was first witnessed.
Post-T111.2: the link now includes a turn anchor when the memory
row carries an ``event_id`` (T109's nullable column is populated for
rows projected after migration 0014 ran). We assert on the chat-id
portion of the href because the exact event id is autoincrement and
depends on seed order; the dedicated
``test_search_result_link_includes_turn_anchor`` test below pins the
anchor format itself."""
_seed_two_chats_with_memories(tmp_path / "test.db")
resp = client.get("/search?q=rabbit")
assert resp.status_code == 200
# The link target is chat-level (memories don't carry an event_id
# column today, so we don't deep-link to a specific turn).
assert 'href="/chats/chat_a"' in resp.text
assert 'href="/chats/chat_a' in resp.text
def test_search_results_include_fts_snippet_with_highlight(client, tmp_path):
"""T111.1: FTS snippet() wraps each match in ``<mark>...</mark>`` so
the result row visually highlights the term that matched.
The seeded ``pov_summary`` is ``the rabbit darted across chat_a``;
SQLite's ``snippet()`` returns the column text with each match token
wrapped — searching for ``rabbit`` yields a snippet containing
``<mark>rabbit</mark>``. Assertion is just that the marker appears
(the snippet may be truncated with an ellipsis when the indexed text
runs longer than the configured token window)."""
_seed_two_chats_with_memories(tmp_path / "test.db")
resp = client.get("/search?q=rabbit")
assert resp.status_code == 200
assert "<mark>rabbit</mark>" in resp.text
def test_search_result_link_includes_turn_anchor(client, tmp_path):
"""T111.2: result links deep-link to the originating turn via the
chat-page anchor stamped by Phase 3.5 T86 (``id="turn-{event_id}"``).
The seeded ``memory_written`` events are projected with
``memories.event_id`` populated (T109); the route exposes that id and
the template builds the link as ``/chats/{chat_id}#turn-{event_id}``.
We don't assert a specific event id (it's an autoincrement that
depends on seed order), only that *some* turn anchor is present for
the chat link the user is about to click."""
_seed_two_chats_with_memories(tmp_path / "test.db")
resp = client.get("/search?q=rabbit")
assert resp.status_code == 200
assert "/chats/chat_a#turn-" in resp.text
def test_search_results_use_batched_lookups(client, tmp_path):