diff --git a/chat/state/memory.py b/chat/state/memory.py index 42a7e95..a9d62df 100644 --- a/chat/state/memory.py +++ b/chat/state/memory.py @@ -112,6 +112,25 @@ SIGNIFICANCE_RANK_BIAS = 0.5 RRF_CONST = 60 +def _max_event_id(conn: Connection, owner_id: str) -> int: + """Return the largest ``memories.id`` for ``owner_id`` (1 if none exist). + + Used as the recency-boost denominator by both ``_composite_rerank`` and + ``_rrf_fuse_and_rerank`` (T104). The row id is a monotonic recency proxy + — newer memories have larger ids — so dividing by the per-owner max keeps + the boost in [0, 1] regardless of how many memories the owner has. + + Returns 1 (not 0) when the owner has no rows so callers can divide by + the result without a guard. The "no memories" case never actually hits + this helper because the FTS query above would have returned no rows, + but the safe default keeps the helper trivially reusable. + """ + row = conn.execute( + "SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,) + ).fetchone() + return row[0] if row and row[0] else 1 + + def search_memories( conn: Connection, owner_id: str, @@ -163,6 +182,14 @@ def search_memories( When ``query_vector`` is None: FTS-only behaviour unchanged — all Phase 1-3.5 callers see the same row shape and ordering as before. + + **Row-shape contract (T104):** every returned dict carries an + ``fts_rank`` key. For FTS hits this is the BM25 score (a negative float, + lower-is-better). For *vector-only* hits surfaced by the fused path — + rows that matched the query embedding but did NOT match FTS — the + ``fts_rank`` value is ``None``. Downstream consumers must accept + ``None`` here; do not assume ``fts_rank`` is always numeric. The + ``composite_score`` is always a float on every returned row. """ if witness_role not in _VALID_WITNESS_ROLES: raise ValueError( @@ -227,10 +254,7 @@ def _composite_rerank( Extracted from ``search_memories`` so the no-vector path stays a single call and the fused path can re-use the same boost formulae after RRF. """ - max_id_row = conn.execute( - "SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,) - ).fetchone() - max_id = max_id_row[0] if max_id_row and max_id_row[0] else 1 + max_id = _max_event_id(conn, owner_id) result_cols = cols + ["fts_rank"] enriched: list[dict] = [] @@ -343,10 +367,7 @@ def _rrf_fuse_and_rerank( # Final composite re-rank: significance + recency boosts on top of the # negated fusion score so the sort direction matches the FTS-only path. - max_id_row = conn.execute( - "SELECT MAX(id) FROM memories WHERE owner_id = ?", (owner_id,) - ).fetchone() - max_id = max_id_row[0] if max_id_row and max_id_row[0] else 1 + max_id = _max_event_id(conn, owner_id) result_cols = cols + ["fts_rank"] enriched: list[dict] = []