merge: T57 significance-aware retrieval ranking
This commit is contained in:
+15
-2
@@ -94,6 +94,14 @@ def get_pinned(conn: Connection, owner_id: str) -> list[dict]:
|
|||||||
_SIGNIFICANCE_WEIGHT = 0.3
|
_SIGNIFICANCE_WEIGHT = 0.3
|
||||||
_RECENCY_WEIGHT = 0.5
|
_RECENCY_WEIGHT = 0.5
|
||||||
|
|
||||||
|
# T57 (Phase 3, §11.1): significance multiplier applied to the SQL ORDER BY in
|
||||||
|
# ``search_memories`` so that the FTS over-fetch already prefers
|
||||||
|
# higher-significance rows for tied / near-tied BM25 ranks. Module-level so it
|
||||||
|
# can be tuned without a code change. BM25 ``rank`` is lower-is-better, so the
|
||||||
|
# bias is *subtracted* from rank in the ASC ordering — equivalent to multiplying
|
||||||
|
# a higher-is-better score by a positive constant per the spec wording.
|
||||||
|
SIGNIFICANCE_RANK_BIAS = 0.5
|
||||||
|
|
||||||
|
|
||||||
def search_memories(
|
def search_memories(
|
||||||
conn: Connection,
|
conn: Connection,
|
||||||
@@ -137,10 +145,15 @@ def search_memories(
|
|||||||
"JOIN memories m ON m.id = memories_fts.rowid "
|
"JOIN memories m ON m.id = memories_fts.rowid "
|
||||||
f"WHERE m.owner_id = ? AND m.{witness_col} = 1 "
|
f"WHERE m.owner_id = ? AND m.{witness_col} = 1 "
|
||||||
"AND memories_fts MATCH ? "
|
"AND memories_fts MATCH ? "
|
||||||
"ORDER BY memories_fts.rank "
|
# T57: significance multiplier biases the FTS over-fetch order. BM25
|
||||||
|
# ``rank`` is lower-is-better, so subtracting ``significance * BIAS``
|
||||||
|
# surfaces higher-significance rows above lower-significance rows with
|
||||||
|
# equal/near-equal match strength. Equivalent to ``score × constant``
|
||||||
|
# per §11.1 once the rank is inverted to a higher-is-better score.
|
||||||
|
"ORDER BY (memories_fts.rank - m.significance * ?) ASC "
|
||||||
"LIMIT ?"
|
"LIMIT ?"
|
||||||
)
|
)
|
||||||
cur = conn.execute(sql, (owner_id, query, over_fetch))
|
cur = conn.execute(sql, (owner_id, query, SIGNIFICANCE_RANK_BIAS, over_fetch))
|
||||||
rows = cur.fetchall()
|
rows = cur.fetchall()
|
||||||
if not rows:
|
if not rows:
|
||||||
return []
|
return []
|
||||||
|
|||||||
@@ -125,3 +125,37 @@ def test_search_invalid_witness_role_raises(tmp_path):
|
|||||||
with open_db(db) as conn:
|
with open_db(db) as conn:
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
search_memories(conn, "bot_a", "invalid_role", "anything", k=4)
|
search_memories(conn, "bot_a", "invalid_role", "anything", k=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_higher_significance_outranks_equal_rank(tmp_path):
|
||||||
|
"""T57: significance multiplier biases the SQL ORDER BY.
|
||||||
|
|
||||||
|
Two memories with IDENTICAL FTS-matching text yield (effectively) equal
|
||||||
|
BM25 ranks. The significance bias applied in the SQL ORDER BY must
|
||||||
|
surface the higher-significance row first.
|
||||||
|
"""
|
||||||
|
db = tmp_path / "t.db"
|
||||||
|
_seed(
|
||||||
|
db,
|
||||||
|
memory_specs=[
|
||||||
|
# Identical pov_summary text -> FTS BM25 rank is the same for both.
|
||||||
|
{"pov_summary": "she swore an oath", "significance": 0},
|
||||||
|
{"pov_summary": "she swore an oath", "significance": 3},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
with open_db(db) as conn:
|
||||||
|
out = search_memories(conn, "bot_a", "host", "oath", k=5)
|
||||||
|
assert len(out) == 2
|
||||||
|
# Higher significance wins despite tied FTS rank.
|
||||||
|
assert out[0]["significance"] == 3
|
||||||
|
assert out[1]["significance"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_significance_bias_is_constant_module_level():
|
||||||
|
"""T57: pin ``SIGNIFICANCE_RANK_BIAS`` as a tunable module-level numeric."""
|
||||||
|
from chat.state.memory import SIGNIFICANCE_RANK_BIAS
|
||||||
|
|
||||||
|
assert isinstance(SIGNIFICANCE_RANK_BIAS, (int, float))
|
||||||
|
# Must be non-negative -- a negative bias would invert the desired
|
||||||
|
# "higher significance ranks higher" semantics.
|
||||||
|
assert SIGNIFICANCE_RANK_BIAS >= 0
|
||||||
|
|||||||
Reference in New Issue
Block a user