Files
chat/chat/services/cross_chat_search.py
T
2026-04-27 02:31:31 -04:00

76 lines
2.5 KiB
Python

"""Cross-chat search service (T93, Phase 4).
FTS5-based search across ALL owners and ALL chats. Used by the
top-bar search UX (T100) for "where did I last see this character
mention X?" queries. NO witness filter -- this is intentionally a
power-user surface that surfaces memories across POVs.
Mirrors the FTS5 access pattern of ``chat.state.memory.search_memories``
but drops both the ``owner_id = ?`` and the per-witness predicates so a
single query can sweep every chat in the database. The composite
re-rank is also dropped: callers want raw BM25 ordering for the
"highest match strength wins" semantics expected of a global search box.
"""
from __future__ import annotations
from sqlite3 import Connection
def search_all_memories(
conn: Connection,
*,
query: str,
k: int = 20,
) -> list[dict]:
"""Search FTS5 across all owners and chats.
Returns rows with ``{memory_id, owner_id, chat_id, scene_id,
pov_summary, significance, ts, fts_rank}``, sorted by FTS5 BM25
rank ascending (lower rank = stronger match, surfaced first).
The ``memories`` table has no ``ts`` column; we expose ``created_at``
(the projector-side row insertion timestamp) under that key so the
UI does not have to know the storage name.
An empty / whitespace-only ``query`` short-circuits to ``[]`` to
avoid an FTS5 ``MATCH ''`` syntax error and to keep the top-bar
"no input yet" state from triggering a full-table scan.
"""
if not query or not query.strip():
return []
# FTS5 MATCH against the same ``memories_fts`` virtual table that
# backs ``state.memory.search_memories``; the JOIN pulls metadata
# from the content table because the FTS index only stores
# ``pov_summary``. ORDER BY rank ASC because BM25 in FTS5 returns
# negative scores where lower is better.
rows = conn.execute(
"SELECT m.id, m.owner_id, m.chat_id, m.scene_id, "
" m.pov_summary, m.significance, m.created_at, "
" memories_fts.rank "
"FROM memories_fts "
"JOIN memories m ON m.id = memories_fts.rowid "
"WHERE memories_fts MATCH ? "
"ORDER BY memories_fts.rank ASC "
"LIMIT ?",
(query.strip(), k),
).fetchall()
return [
{
"memory_id": r[0],
"owner_id": r[1],
"chat_id": r[2],
"scene_id": r[3],
"pov_summary": r[4],
"significance": r[5],
"ts": r[6],
"fts_rank": r[7],
}
for r in rows
]
__all__ = ["search_all_memories"]