diff --git a/chat/state/memory.py b/chat/state/memory.py index 0426067..0eda418 100644 --- a/chat/state/memory.py +++ b/chat/state/memory.py @@ -94,6 +94,14 @@ def get_pinned(conn: Connection, owner_id: str) -> list[dict]: _SIGNIFICANCE_WEIGHT = 0.3 _RECENCY_WEIGHT = 0.5 +# T57 (Phase 3, §11.1): significance multiplier applied to the SQL ORDER BY in +# ``search_memories`` so that the FTS over-fetch already prefers +# higher-significance rows for tied / near-tied BM25 ranks. Module-level so it +# can be tuned without a code change. BM25 ``rank`` is lower-is-better, so the +# bias is *subtracted* from rank in the ASC ordering — equivalent to multiplying +# a higher-is-better score by a positive constant per the spec wording. +SIGNIFICANCE_RANK_BIAS = 0.5 + def search_memories( conn: Connection, @@ -137,10 +145,15 @@ def search_memories( "JOIN memories m ON m.id = memories_fts.rowid " f"WHERE m.owner_id = ? AND m.{witness_col} = 1 " "AND memories_fts MATCH ? " - "ORDER BY memories_fts.rank " + # T57: significance multiplier biases the FTS over-fetch order. BM25 + # ``rank`` is lower-is-better, so subtracting ``significance * BIAS`` + # surfaces higher-significance rows above lower-significance rows with + # equal/near-equal match strength. Equivalent to ``score × constant`` + # per §11.1 once the rank is inverted to a higher-is-better score. + "ORDER BY (memories_fts.rank - m.significance * ?) ASC " "LIMIT ?" ) - cur = conn.execute(sql, (owner_id, query, over_fetch)) + cur = conn.execute(sql, (owner_id, query, SIGNIFICANCE_RANK_BIAS, over_fetch)) rows = cur.fetchall() if not rows: return [] diff --git a/tests/test_memory_search.py b/tests/test_memory_search.py index dad7e84..76f0ee1 100644 --- a/tests/test_memory_search.py +++ b/tests/test_memory_search.py @@ -125,3 +125,37 @@ def test_search_invalid_witness_role_raises(tmp_path): with open_db(db) as conn: with pytest.raises(ValueError): search_memories(conn, "bot_a", "invalid_role", "anything", k=4) + + +def test_higher_significance_outranks_equal_rank(tmp_path): + """T57: significance multiplier biases the SQL ORDER BY. + + Two memories with IDENTICAL FTS-matching text yield (effectively) equal + BM25 ranks. The significance bias applied in the SQL ORDER BY must + surface the higher-significance row first. + """ + db = tmp_path / "t.db" + _seed( + db, + memory_specs=[ + # Identical pov_summary text -> FTS BM25 rank is the same for both. + {"pov_summary": "she swore an oath", "significance": 0}, + {"pov_summary": "she swore an oath", "significance": 3}, + ], + ) + with open_db(db) as conn: + out = search_memories(conn, "bot_a", "host", "oath", k=5) + assert len(out) == 2 + # Higher significance wins despite tied FTS rank. + assert out[0]["significance"] == 3 + assert out[1]["significance"] == 0 + + +def test_significance_bias_is_constant_module_level(): + """T57: pin ``SIGNIFICANCE_RANK_BIAS`` as a tunable module-level numeric.""" + from chat.state.memory import SIGNIFICANCE_RANK_BIAS + + assert isinstance(SIGNIFICANCE_RANK_BIAS, (int, float)) + # Must be non-negative -- a negative bias would invert the desired + # "higher significance ranks higher" semantics. + assert SIGNIFICANCE_RANK_BIAS >= 0