feat: branching read-side filter — event readers consult active branch range (T113)
Wire the active branch's [origin_event_id, head_event_id] window into every user-facing event/memory reader so switching branches actually changes what dialogue and memories the user sees. Phase 4 T89/T94 shipped branches as metadata-only — this closes the loop. Helper: - chat/state/branches.py: add `active_branch_event_ids(conn)` returning the active branch's id range, with two defensive fall-throughs to `(0, BIG_INT)`: (a) no active branch row at all, and (b) the bootstrap "main" sentinel (name="main", origin=0, head=0). Production never bumps main's head_event_id today, so this preserves existing reader behaviour for every test that doesn't explicitly switch. Readers updated (all user-facing dialogue / retrieval surfaces): - chat/services/turn_common.py::read_recent_dialogue — chat-history prompt context + the chat-view template path (via web/turns.py + web/chat.py). - chat/services/scene_summarize.py::_read_recent_dialogue — scene-close per-POV summary input. - chat/state/memory.py::search_memories — FTS leg filters via m.event_id (T109's column); legacy NULL event_id rows are *included* unconditionally so the filter doesn't break pre-0014 retrieval. The fused (FTS + RRF + vector) path also drops vector hits whose event_id falls outside the branch window. - chat/web/meanwhile.py::_read_recent_meanwhile_dialogue — meanwhile prompt context. Projector queries (chat/state/world.py et al.) and admin/management surfaces (drawer hide-panel, cross-chat search, regenerate's row lookups by id) are intentionally NOT branch-filtered: projection must see the full log to build state correctly, and the admin surfaces operate across branches by design. Tests (10 new, 446 total): - tests/test_branches_state.py: 3 tests for `active_branch_event_ids` itself (bootstrap-main, no-active-branch, non-main literal range). - tests/test_branching.py: 7 cross-feature tests covering the spec's five required scenarios plus scene_summarize and meanwhile readers.
This commit is contained in:
@@ -144,23 +144,36 @@ def _read_recent_dialogue(
|
||||
``id >= since_event_id`` so callers needing a scene-scoped view (e.g.
|
||||
thread detection on close) don't pull turns that landed before the
|
||||
closing scene's ``scene_opened`` event.
|
||||
|
||||
T113: also clamps by the active branch's ``[origin, head]`` event-id
|
||||
range so scene-summary inputs respect the user's current branch.
|
||||
Bootstrap-main and "no active branch" fall through to ``(0, BIG_INT)``
|
||||
so existing flows are unchanged.
|
||||
"""
|
||||
from chat.state.branches import active_branch_event_ids
|
||||
|
||||
origin, head = active_branch_event_ids(conn)
|
||||
if since_event_id is None:
|
||||
cur = conn.execute(
|
||||
"SELECT kind, payload_json FROM event_log "
|
||||
"WHERE kind IN ('user_turn', 'assistant_turn') "
|
||||
" AND superseded_by IS NULL AND hidden = 0 "
|
||||
" AND id BETWEEN ? AND ? "
|
||||
"ORDER BY id DESC LIMIT ?",
|
||||
(limit,),
|
||||
(origin, head, limit),
|
||||
)
|
||||
else:
|
||||
# Compose ``since_event_id`` with the branch lower bound — readers
|
||||
# want the tightest ``id >= max(since, origin)`` clamp without an
|
||||
# extra Python pass.
|
||||
lower = max(origin, since_event_id)
|
||||
cur = conn.execute(
|
||||
"SELECT kind, payload_json FROM event_log "
|
||||
"WHERE kind IN ('user_turn', 'assistant_turn') "
|
||||
" AND superseded_by IS NULL AND hidden = 0 "
|
||||
" AND id >= ? "
|
||||
" AND id BETWEEN ? AND ? "
|
||||
"ORDER BY id DESC LIMIT ?",
|
||||
(since_event_id, limit),
|
||||
(lower, head, limit),
|
||||
)
|
||||
rows = list(reversed(cur.fetchall()))
|
||||
out: list[dict] = []
|
||||
|
||||
@@ -30,6 +30,7 @@ from __future__ import annotations
|
||||
import json
|
||||
from sqlite3 import Connection
|
||||
|
||||
from chat.state.branches import active_branch_event_ids
|
||||
from chat.state.edges import get_edge
|
||||
|
||||
|
||||
@@ -60,15 +61,22 @@ def read_recent_dialogue(
|
||||
previous implementation filtered chat_id post-fetch in Python, which
|
||||
let foreign-chat rows fill the LIMIT and yield fewer than N relevant
|
||||
rows in busy multi-chat databases.
|
||||
|
||||
T113: clamp by the active branch's ``[origin, head]`` event-id range so
|
||||
switching branches actually changes what dialogue this read sees.
|
||||
Bootstrap-main and "no active branch" both fall through to ``(0,
|
||||
BIG_INT)`` — no functional change for the metadata-only Phase 4 era.
|
||||
"""
|
||||
origin, head = active_branch_event_ids(conn)
|
||||
if exclude_event_id is None:
|
||||
cur = conn.execute(
|
||||
"SELECT id, kind, payload_json FROM event_log "
|
||||
"WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
|
||||
" AND superseded_by IS NULL AND hidden = 0 "
|
||||
" AND id BETWEEN ? AND ? "
|
||||
" AND json_extract(payload_json, '$.chat_id') = ? "
|
||||
"ORDER BY id DESC LIMIT ?",
|
||||
(chat_id, limit),
|
||||
(origin, head, chat_id, limit),
|
||||
)
|
||||
else:
|
||||
cur = conn.execute(
|
||||
@@ -76,9 +84,10 @@ def read_recent_dialogue(
|
||||
"WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
|
||||
" AND id != ? "
|
||||
" AND superseded_by IS NULL AND hidden = 0 "
|
||||
" AND id BETWEEN ? AND ? "
|
||||
" AND json_extract(payload_json, '$.chat_id') = ? "
|
||||
"ORDER BY id DESC LIMIT ?",
|
||||
(exclude_event_id, chat_id, limit),
|
||||
(exclude_event_id, origin, head, chat_id, limit),
|
||||
)
|
||||
rows = list(reversed(cur.fetchall()))
|
||||
out: list[dict] = []
|
||||
|
||||
@@ -157,8 +157,58 @@ def active_branch(conn: Connection) -> dict | None:
|
||||
}
|
||||
|
||||
|
||||
# T113: sentinel "no upper bound" used by ``active_branch_event_ids`` when the
|
||||
# active branch's head is unset (the bootstrap "main" branch with origin=0 +
|
||||
# head=0). Readers compose ``id BETWEEN origin AND head`` so a value larger
|
||||
# than any possible row id behaves as "no clamp" without needing a separate
|
||||
# code path. ``2**63 - 1`` is SQLite's max signed-int — safe forever.
|
||||
_NO_HEAD_CLAMP = 2**63 - 1
|
||||
|
||||
|
||||
def active_branch_event_ids(conn: Connection) -> tuple[int, int]:
|
||||
"""Return ``(origin_event_id, head_event_id)`` for the currently active
|
||||
branch, suitable as bounds for an ``event_log.id BETWEEN ? AND ?`` clamp
|
||||
on user-facing reads (T113).
|
||||
|
||||
Defensive defaults:
|
||||
|
||||
* **No active branch row** (``active_branch`` returns ``None``) — return
|
||||
``(0, _NO_HEAD_CLAMP)`` so readers see all events. This preserves the
|
||||
Phase 4 "branches are metadata-only" contract for any code path that
|
||||
somehow runs without the migration-0013 bootstrap.
|
||||
* **Bootstrap "main"** — the canonical ``name="main", origin=0, head=0``
|
||||
row inserted by migration 0013. Production today never emits
|
||||
``branch_head_updated`` for main, so head stays at 0 even as events
|
||||
accumulate. We treat this exact bootstrap state as "no clamp" and
|
||||
return ``(0, _NO_HEAD_CLAMP)`` so all events remain visible. This is
|
||||
what every existing test (which never configures branches) relies on.
|
||||
* **Any other branch** — return the literal ``(origin, head)`` from the
|
||||
branch row. A branch created at origin=N has head=N initially (per
|
||||
``branch_from_event``), so ``BETWEEN N AND N`` returns just that one
|
||||
seed event until the head is bumped via ``branch_head_updated``.
|
||||
|
||||
Note on the schema mismatch with the T113 spec: the spec describes
|
||||
``head_event_id`` as nullable, but migration 0013 declared it
|
||||
``NOT NULL DEFAULT 0``. We read head=0 on bootstrap main as the
|
||||
"unset" sentinel; non-main branches never reach head=0 in normal
|
||||
flow (creation sets head=origin, and origin=0 only for main).
|
||||
"""
|
||||
branch = active_branch(conn)
|
||||
if branch is None:
|
||||
return (0, _NO_HEAD_CLAMP)
|
||||
origin = int(branch.get("origin_event_id") or 0)
|
||||
head = int(branch.get("head_event_id") or 0)
|
||||
# Bootstrap "main" sentinel — see docstring above. Detect by name +
|
||||
# both ids being 0 to avoid mis-firing on a hypothetical future
|
||||
# branch that legitimately starts at origin=0.
|
||||
if branch.get("name") == "main" and origin == 0 and head == 0:
|
||||
return (0, _NO_HEAD_CLAMP)
|
||||
return (origin, head)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"get_branch",
|
||||
"list_branches",
|
||||
"active_branch",
|
||||
"active_branch_event_ids",
|
||||
]
|
||||
|
||||
+34
-1
@@ -213,12 +213,20 @@ def search_memories(
|
||||
# channel) so memories that are weak in FTS but strong in vector — and
|
||||
# vice versa — make it into the merge pool.
|
||||
over_fetch = max(k * 2, 20) if query_vector is not None else max(k * 4, 20)
|
||||
# T113: branch-scope filter on ``m.event_id`` (T109's column). Memories
|
||||
# whose ``event_id`` is NULL — projected before the 0014 schema migration
|
||||
# ran — are *included* unconditionally so the branch filter never breaks
|
||||
# legacy retrieval. Newer rows respect the active branch's bounds.
|
||||
from chat.state.branches import active_branch_event_ids
|
||||
|
||||
origin, head = active_branch_event_ids(conn)
|
||||
sql = (
|
||||
f"SELECT {select_list}, memories_fts.rank AS fts_rank "
|
||||
"FROM memories_fts "
|
||||
"JOIN memories m ON m.id = memories_fts.rowid "
|
||||
f"WHERE m.owner_id = ? AND m.{witness_col} = 1 "
|
||||
"AND memories_fts MATCH ? "
|
||||
"AND (m.event_id IS NULL OR m.event_id BETWEEN ? AND ?) "
|
||||
# T57: significance multiplier biases the FTS over-fetch order. BM25
|
||||
# ``rank`` is lower-is-better, so subtracting ``significance * BIAS``
|
||||
# surfaces higher-significance rows above lower-significance rows with
|
||||
@@ -227,7 +235,10 @@ def search_memories(
|
||||
"ORDER BY (memories_fts.rank - m.significance * ?) ASC "
|
||||
"LIMIT ?"
|
||||
)
|
||||
cur = conn.execute(sql, (owner_id, query, SIGNIFICANCE_RANK_BIAS, over_fetch))
|
||||
cur = conn.execute(
|
||||
sql,
|
||||
(owner_id, query, origin, head, SIGNIFICANCE_RANK_BIAS, over_fetch),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
|
||||
# FTS-only path: preserve pre-T96 behaviour exactly.
|
||||
@@ -331,6 +342,28 @@ def _rrf_fuse_and_rerank(
|
||||
query_vector=query_vector,
|
||||
k=vec_over_fetch,
|
||||
)
|
||||
# T113: drop vector hits that fall outside the active branch's event-id
|
||||
# range. ``vector_search`` is a generic service used elsewhere; the
|
||||
# branch filter applied to the FTS leg also has to apply here so the
|
||||
# fused result respects the same scope. Memories with NULL event_id
|
||||
# (legacy rows projected before T109's 0014 schema migration) are
|
||||
# included unconditionally — same policy as the FTS leg.
|
||||
from chat.state.branches import _NO_HEAD_CLAMP, active_branch_event_ids
|
||||
|
||||
vec_origin, vec_head = active_branch_event_ids(conn)
|
||||
if vec_hits and (vec_origin > 0 or vec_head < _NO_HEAD_CLAMP):
|
||||
vec_ids = [h["memory_id"] for h in vec_hits]
|
||||
placeholders_v = ",".join("?" * len(vec_ids))
|
||||
in_range = {
|
||||
row[0]
|
||||
for row in conn.execute(
|
||||
f"SELECT id FROM memories "
|
||||
f"WHERE id IN ({placeholders_v}) "
|
||||
f" AND (event_id IS NULL OR event_id BETWEEN ? AND ?)",
|
||||
(*vec_ids, vec_origin, vec_head),
|
||||
).fetchall()
|
||||
}
|
||||
vec_hits = [h for h in vec_hits if h["memory_id"] in in_range]
|
||||
vec_rank_by_id: dict[int, int] = {
|
||||
hit["memory_id"]: rank for rank, hit in enumerate(vec_hits)
|
||||
}
|
||||
|
||||
+10
-1
@@ -71,18 +71,27 @@ def _read_recent_meanwhile_dialogue(
|
||||
that already match — avoids an unbounded scan as ``event_log``
|
||||
grows. The user-side rows match on chat_id only since they aren't
|
||||
tagged with a scene id (they ride the chat-wide log).
|
||||
|
||||
T113: clamp by the active branch's ``[origin, head]`` event-id range
|
||||
so meanwhile prompt context respects the user's current branch.
|
||||
Bootstrap-main and "no active branch" both fall through to ``(0,
|
||||
BIG_INT)`` — no functional change for the metadata-only Phase 4 era.
|
||||
"""
|
||||
from chat.state.branches import active_branch_event_ids
|
||||
|
||||
origin, head = active_branch_event_ids(conn)
|
||||
cur = conn.execute(
|
||||
"SELECT id, kind, payload_json FROM event_log "
|
||||
"WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
|
||||
" AND superseded_by IS NULL AND hidden = 0 "
|
||||
" AND id BETWEEN ? AND ? "
|
||||
" AND json_extract(payload_json, '$.chat_id') = ? "
|
||||
" AND ("
|
||||
" kind IN ('user_turn', 'user_turn_edit') "
|
||||
" OR json_extract(payload_json, '$.meanwhile_scene_id') = ?"
|
||||
" ) "
|
||||
"ORDER BY id DESC LIMIT ?",
|
||||
(chat_id, scene_id, limit),
|
||||
(origin, head, chat_id, scene_id, limit),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
rows.reverse()
|
||||
|
||||
Reference in New Issue
Block a user