chore: memory.py DRY MAX(id) helper + document fts_rank=None contract (T104)
This commit is contained in:
+29
-8
@@ -112,6 +112,25 @@ SIGNIFICANCE_RANK_BIAS = 0.5
|
||||
RRF_CONST = 60
|
||||
|
||||
|
||||
def _max_event_id(conn: Connection, owner_id: str) -> int:
|
||||
"""Return the largest ``memories.id`` for ``owner_id`` (1 if none exist).
|
||||
|
||||
Used as the recency-boost denominator by both ``_composite_rerank`` and
|
||||
``_rrf_fuse_and_rerank`` (T104). The row id is a monotonic recency proxy
|
||||
— newer memories have larger ids — so dividing by the per-owner max keeps
|
||||
the boost in [0, 1] regardless of how many memories the owner has.
|
||||
|
||||
Returns 1 (not 0) when the owner has no rows so callers can divide by
|
||||
the result without a guard. The "no memories" case never actually hits
|
||||
this helper because the FTS query above would have returned no rows,
|
||||
but the safe default keeps the helper trivially reusable.
|
||||
"""
|
||||
row = conn.execute(
|
||||
"SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,)
|
||||
).fetchone()
|
||||
return row[0] if row and row[0] else 1
|
||||
|
||||
|
||||
def search_memories(
|
||||
conn: Connection,
|
||||
owner_id: str,
|
||||
@@ -163,6 +182,14 @@ def search_memories(
|
||||
|
||||
When ``query_vector`` is None: FTS-only behaviour unchanged — all
|
||||
Phase 1-3.5 callers see the same row shape and ordering as before.
|
||||
|
||||
**Row-shape contract (T104):** every returned dict carries an
|
||||
``fts_rank`` key. For FTS hits this is the BM25 score (a negative float,
|
||||
lower-is-better). For *vector-only* hits surfaced by the fused path —
|
||||
rows that matched the query embedding but did NOT match FTS — the
|
||||
``fts_rank`` value is ``None``. Downstream consumers must accept
|
||||
``None`` here; do not assume ``fts_rank`` is always numeric. The
|
||||
``composite_score`` is always a float on every returned row.
|
||||
"""
|
||||
if witness_role not in _VALID_WITNESS_ROLES:
|
||||
raise ValueError(
|
||||
@@ -227,10 +254,7 @@ def _composite_rerank(
|
||||
Extracted from ``search_memories`` so the no-vector path stays a single
|
||||
call and the fused path can re-use the same boost formulae after RRF.
|
||||
"""
|
||||
max_id_row = conn.execute(
|
||||
"SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,)
|
||||
).fetchone()
|
||||
max_id = max_id_row[0] if max_id_row and max_id_row[0] else 1
|
||||
max_id = _max_event_id(conn, owner_id)
|
||||
|
||||
result_cols = cols + ["fts_rank"]
|
||||
enriched: list[dict] = []
|
||||
@@ -343,10 +367,7 @@ def _rrf_fuse_and_rerank(
|
||||
|
||||
# Final composite re-rank: significance + recency boosts on top of the
|
||||
# negated fusion score so the sort direction matches the FTS-only path.
|
||||
max_id_row = conn.execute(
|
||||
"SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,)
|
||||
).fetchone()
|
||||
max_id = max_id_row[0] if max_id_row and max_id_row[0] else 1
|
||||
max_id = _max_event_id(conn, owner_id)
|
||||
|
||||
result_cols = cols + ["fts_rank"]
|
||||
enriched: list[dict] = []
|
||||
|
||||
Reference in New Issue
Block a user