merge: T57 significance-aware retrieval ranking

2026-04-26 20:21:01 -04:00
parent 88350d7d2e 5e6b29e0c5
commit 8aeadfd0e4
2 changed files with 49 additions and 2 deletions
@@ -94,6 +94,14 @@ def get_pinned(conn: Connection, owner_id: str) -> list[dict]:
 _SIGNIFICANCE_WEIGHT = 0.3
 _RECENCY_WEIGHT = 0.5

+# T57 (Phase 3, §11.1): significance multiplier applied to the SQL ORDER BY in
+# ``search_memories`` so that the FTS over-fetch already prefers
+# higher-significance rows for tied / near-tied BM25 ranks. Module-level so it
+# can be tuned without a code change. BM25 ``rank`` is lower-is-better, so the
+# bias is *subtracted* from rank in the ASC ordering — equivalent to multiplying
+# a higher-is-better score by a positive constant per the spec wording.
+SIGNIFICANCE_RANK_BIAS = 0.5
+

 def search_memories(
    conn: Connection,
@@ -137,10 +145,15 @@ def search_memories(
        "JOIN memories m ON m.id = memories_fts.rowid "
        f"WHERE m.owner_id = ? AND m.{witness_col} = 1 "
        "AND memories_fts MATCH ? "
-        "ORDER BY memories_fts.rank "
+        # T57: significance multiplier biases the FTS over-fetch order. BM25
+        # ``rank`` is lower-is-better, so subtracting ``significance * BIAS``
+        # surfaces higher-significance rows above lower-significance rows with
+        # equal/near-equal match strength. Equivalent to ``score × constant``
+        # per §11.1 once the rank is inverted to a higher-is-better score.
+        "ORDER BY (memories_fts.rank - m.significance * ?) ASC "
        "LIMIT ?"
    )
-    cur = conn.execute(sql, (owner_id, query, over_fetch))
+    cur = conn.execute(sql, (owner_id, query, SIGNIFICANCE_RANK_BIAS, over_fetch))
    rows = cur.fetchall()
    if not rows:
        return []
@@ -125,3 +125,37 @@ def test_search_invalid_witness_role_raises(tmp_path):
    with open_db(db) as conn:
        with pytest.raises(ValueError):
            search_memories(conn, "bot_a", "invalid_role", "anything", k=4)
+
+
+def test_higher_significance_outranks_equal_rank(tmp_path):
+    """T57: significance multiplier biases the SQL ORDER BY.
+
+    Two memories with IDENTICAL FTS-matching text yield (effectively) equal
+    BM25 ranks. The significance bias applied in the SQL ORDER BY must
+    surface the higher-significance row first.
+    """
+    db = tmp_path / "t.db"
+    _seed(
+        db,
+        memory_specs=[
+            # Identical pov_summary text -> FTS BM25 rank is the same for both.
+            {"pov_summary": "she swore an oath", "significance": 0},
+            {"pov_summary": "she swore an oath", "significance": 3},
+        ],
+    )
+    with open_db(db) as conn:
+        out = search_memories(conn, "bot_a", "host", "oath", k=5)
+        assert len(out) == 2
+        # Higher significance wins despite tied FTS rank.
+        assert out[0]["significance"] == 3
+        assert out[1]["significance"] == 0
+
+
+def test_significance_bias_is_constant_module_level():
+    """T57: pin ``SIGNIFICANCE_RANK_BIAS`` as a tunable module-level numeric."""
+    from chat.state.memory import SIGNIFICANCE_RANK_BIAS
+
+    assert isinstance(SIGNIFICANCE_RANK_BIAS, (int, float))
+    # Must be non-negative -- a negative bias would invert the desired
+    # "higher significance ranks higher" semantics.
+    assert SIGNIFICANCE_RANK_BIAS >= 0