chore: memory.py DRY MAX(id) helper + document fts_rank=None contract (T104)

This commit is contained in:
Joseph Doherty
2026-04-27 04:34:28 -04:00
parent a06f90a164
commit b65e1e1098
+29 -8
View File
@@ -112,6 +112,25 @@ SIGNIFICANCE_RANK_BIAS = 0.5
RRF_CONST = 60
def _max_event_id(conn: Connection, owner_id: str) -> int:
"""Return the largest ``memories.id`` for ``owner_id`` (1 if none exist).
Used as the recency-boost denominator by both ``_composite_rerank`` and
``_rrf_fuse_and_rerank`` (T104). The row id is a monotonic recency proxy
— newer memories have larger ids — so dividing by the per-owner max keeps
the boost in [0, 1] regardless of how many memories the owner has.
Returns 1 (not 0) when the owner has no rows so callers can divide by
the result without a guard. The "no memories" case never actually hits
this helper because the FTS query above would have returned no rows,
but the safe default keeps the helper trivially reusable.
"""
row = conn.execute(
"SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,)
).fetchone()
return row[0] if row and row[0] else 1
def search_memories(
conn: Connection,
owner_id: str,
@@ -163,6 +182,14 @@ def search_memories(
When ``query_vector`` is None: FTS-only behaviour unchanged — all
Phase 1-3.5 callers see the same row shape and ordering as before.
**Row-shape contract (T104):** every returned dict carries an
``fts_rank`` key. For FTS hits this is the BM25 score (a negative float,
lower-is-better). For *vector-only* hits surfaced by the fused path —
rows that matched the query embedding but did NOT match FTS — the
``fts_rank`` value is ``None``. Downstream consumers must accept
``None`` here; do not assume ``fts_rank`` is always numeric. The
``composite_score`` is always a float on every returned row.
"""
if witness_role not in _VALID_WITNESS_ROLES:
raise ValueError(
@@ -227,10 +254,7 @@ def _composite_rerank(
Extracted from ``search_memories`` so the no-vector path stays a single
call and the fused path can re-use the same boost formulae after RRF.
"""
max_id_row = conn.execute(
"SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,)
).fetchone()
max_id = max_id_row[0] if max_id_row and max_id_row[0] else 1
max_id = _max_event_id(conn, owner_id)
result_cols = cols + ["fts_rank"]
enriched: list[dict] = []
@@ -343,10 +367,7 @@ def _rrf_fuse_and_rerank(
# Final composite re-rank: significance + recency boosts on top of the
# negated fusion score so the sort direction matches the FTS-only path.
max_id_row = conn.execute(
"SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,)
).fetchone()
max_id = max_id_row[0] if max_id_row and max_id_row[0] else 1
max_id = _max_event_id(conn, owner_id)
result_cols = cols + ["fts_rank"]
enriched: list[dict] = []