feat: branching read-side filter — event readers consult active branch range (T113)

Wire the active branch's [origin_event_id, head_event_id] window into
every user-facing event/memory reader so switching branches actually
changes what dialogue and memories the user sees. Phase 4 T89/T94
shipped branches as metadata-only — this closes the loop.

Helper:
- chat/state/branches.py: add `active_branch_event_ids(conn)` returning
  the active branch's id range, with two defensive fall-throughs to
  `(0, BIG_INT)`: (a) no active branch row at all, and (b) the
  bootstrap "main" sentinel (name="main", origin=0, head=0). Production
  never bumps main's head_event_id today, so this preserves existing
  reader behaviour for every test that doesn't explicitly switch.

Readers updated (all user-facing dialogue / retrieval surfaces):
- chat/services/turn_common.py::read_recent_dialogue — chat-history
  prompt context + the chat-view template path (via web/turns.py +
  web/chat.py).
- chat/services/scene_summarize.py::_read_recent_dialogue — scene-close
  per-POV summary input.
- chat/state/memory.py::search_memories — FTS leg filters via
  m.event_id (T109's column); legacy NULL event_id rows are *included*
  unconditionally so the filter doesn't break pre-0014 retrieval. The
  fused (FTS + RRF + vector) path also drops vector hits whose
  event_id falls outside the branch window.
- chat/web/meanwhile.py::_read_recent_meanwhile_dialogue — meanwhile
  prompt context.

Projector queries (chat/state/world.py et al.) and admin/management
surfaces (drawer hide-panel, cross-chat search, regenerate's row
lookups by id) are intentionally NOT branch-filtered: projection must
see the full log to build state correctly, and the admin surfaces
operate across branches by design.

Tests (10 new, 446 total):
- tests/test_branches_state.py: 3 tests for `active_branch_event_ids`
  itself (bootstrap-main, no-active-branch, non-main literal range).
- tests/test_branching.py: 7 cross-feature tests covering the spec's
  five required scenarios plus scene_summarize and meanwhile readers.
This commit is contained in:
Joseph Doherty
2026-04-27 06:25:22 -04:00
parent 757abf24f8
commit 456f50d334
7 changed files with 484 additions and 8 deletions
+16 -3
View File
@@ -144,23 +144,36 @@ def _read_recent_dialogue(
``id >= since_event_id`` so callers needing a scene-scoped view (e.g.
thread detection on close) don't pull turns that landed before the
closing scene's ``scene_opened`` event.
T113: also clamps by the active branch's ``[origin, head]`` event-id
range so scene-summary inputs respect the user's current branch.
Bootstrap-main and "no active branch" fall through to ``(0, BIG_INT)``
so existing flows are unchanged.
"""
from chat.state.branches import active_branch_event_ids
origin, head = active_branch_event_ids(conn)
if since_event_id is None:
cur = conn.execute(
"SELECT kind, payload_json FROM event_log "
"WHERE kind IN ('user_turn', 'assistant_turn') "
" AND superseded_by IS NULL AND hidden = 0 "
" AND id BETWEEN ? AND ? "
"ORDER BY id DESC LIMIT ?",
(limit,),
(origin, head, limit),
)
else:
# Compose ``since_event_id`` with the branch lower bound — readers
# want the tightest ``id >= max(since, origin)`` clamp without an
# extra Python pass.
lower = max(origin, since_event_id)
cur = conn.execute(
"SELECT kind, payload_json FROM event_log "
"WHERE kind IN ('user_turn', 'assistant_turn') "
" AND superseded_by IS NULL AND hidden = 0 "
" AND id >= ? "
" AND id BETWEEN ? AND ? "
"ORDER BY id DESC LIMIT ?",
(since_event_id, limit),
(lower, head, limit),
)
rows = list(reversed(cur.fetchall()))
out: list[dict] = []
+11 -2
View File
@@ -30,6 +30,7 @@ from __future__ import annotations
import json
from sqlite3 import Connection
from chat.state.branches import active_branch_event_ids
from chat.state.edges import get_edge
@@ -60,15 +61,22 @@ def read_recent_dialogue(
previous implementation filtered chat_id post-fetch in Python, which
let foreign-chat rows fill the LIMIT and yield fewer than N relevant
rows in busy multi-chat databases.
T113: clamp by the active branch's ``[origin, head]`` event-id range so
switching branches actually changes what dialogue this read sees.
Bootstrap-main and "no active branch" both fall through to ``(0,
BIG_INT)`` — no functional change for the metadata-only Phase 4 era.
"""
origin, head = active_branch_event_ids(conn)
if exclude_event_id is None:
cur = conn.execute(
"SELECT id, kind, payload_json FROM event_log "
"WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
" AND superseded_by IS NULL AND hidden = 0 "
" AND id BETWEEN ? AND ? "
" AND json_extract(payload_json, '$.chat_id') = ? "
"ORDER BY id DESC LIMIT ?",
(chat_id, limit),
(origin, head, chat_id, limit),
)
else:
cur = conn.execute(
@@ -76,9 +84,10 @@ def read_recent_dialogue(
"WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
" AND id != ? "
" AND superseded_by IS NULL AND hidden = 0 "
" AND id BETWEEN ? AND ? "
" AND json_extract(payload_json, '$.chat_id') = ? "
"ORDER BY id DESC LIMIT ?",
(exclude_event_id, chat_id, limit),
(exclude_event_id, origin, head, chat_id, limit),
)
rows = list(reversed(cur.fetchall()))
out: list[dict] = []
+50
View File
@@ -157,8 +157,58 @@ def active_branch(conn: Connection) -> dict | None:
}
# T113: sentinel "no upper bound" used by ``active_branch_event_ids`` when the
# active branch's head is unset (the bootstrap "main" branch with origin=0 +
# head=0). Readers compose ``id BETWEEN origin AND head`` so a value larger
# than any possible row id behaves as "no clamp" without needing a separate
# code path. ``2**63 - 1`` is SQLite's max signed-int — safe forever.
_NO_HEAD_CLAMP = 2**63 - 1
def active_branch_event_ids(conn: Connection) -> tuple[int, int]:
"""Return ``(origin_event_id, head_event_id)`` for the currently active
branch, suitable as bounds for an ``event_log.id BETWEEN ? AND ?`` clamp
on user-facing reads (T113).
Defensive defaults:
* **No active branch row** (``active_branch`` returns ``None``) — return
``(0, _NO_HEAD_CLAMP)`` so readers see all events. This preserves the
Phase 4 "branches are metadata-only" contract for any code path that
somehow runs without the migration-0013 bootstrap.
* **Bootstrap "main"** — the canonical ``name="main", origin=0, head=0``
row inserted by migration 0013. Production today never emits
``branch_head_updated`` for main, so head stays at 0 even as events
accumulate. We treat this exact bootstrap state as "no clamp" and
return ``(0, _NO_HEAD_CLAMP)`` so all events remain visible. This is
what every existing test (which never configures branches) relies on.
* **Any other branch** — return the literal ``(origin, head)`` from the
branch row. A branch created at origin=N has head=N initially (per
``branch_from_event``), so ``BETWEEN N AND N`` returns just that one
seed event until the head is bumped via ``branch_head_updated``.
Note on the schema mismatch with the T113 spec: the spec describes
``head_event_id`` as nullable, but migration 0013 declared it
``NOT NULL DEFAULT 0``. We read head=0 on bootstrap main as the
"unset" sentinel; non-main branches never reach head=0 in normal
flow (creation sets head=origin, and origin=0 only for main).
"""
branch = active_branch(conn)
if branch is None:
return (0, _NO_HEAD_CLAMP)
origin = int(branch.get("origin_event_id") or 0)
head = int(branch.get("head_event_id") or 0)
# Bootstrap "main" sentinel — see docstring above. Detect by name +
# both ids being 0 to avoid mis-firing on a hypothetical future
# branch that legitimately starts at origin=0.
if branch.get("name") == "main" and origin == 0 and head == 0:
return (0, _NO_HEAD_CLAMP)
return (origin, head)
__all__ = [
"get_branch",
"list_branches",
"active_branch",
"active_branch_event_ids",
]
+34 -1
View File
@@ -213,12 +213,20 @@ def search_memories(
# channel) so memories that are weak in FTS but strong in vector — and
# vice versa — make it into the merge pool.
over_fetch = max(k * 2, 20) if query_vector is not None else max(k * 4, 20)
# T113: branch-scope filter on ``m.event_id`` (T109's column). Memories
# whose ``event_id`` is NULL — projected before the 0014 schema migration
# ran — are *included* unconditionally so the branch filter never breaks
# legacy retrieval. Newer rows respect the active branch's bounds.
from chat.state.branches import active_branch_event_ids
origin, head = active_branch_event_ids(conn)
sql = (
f"SELECT {select_list}, memories_fts.rank AS fts_rank "
"FROM memories_fts "
"JOIN memories m ON m.id = memories_fts.rowid "
f"WHERE m.owner_id = ? AND m.{witness_col} = 1 "
"AND memories_fts MATCH ? "
"AND (m.event_id IS NULL OR m.event_id BETWEEN ? AND ?) "
# T57: significance multiplier biases the FTS over-fetch order. BM25
# ``rank`` is lower-is-better, so subtracting ``significance * BIAS``
# surfaces higher-significance rows above lower-significance rows with
@@ -227,7 +235,10 @@ def search_memories(
"ORDER BY (memories_fts.rank - m.significance * ?) ASC "
"LIMIT ?"
)
cur = conn.execute(sql, (owner_id, query, SIGNIFICANCE_RANK_BIAS, over_fetch))
cur = conn.execute(
sql,
(owner_id, query, origin, head, SIGNIFICANCE_RANK_BIAS, over_fetch),
)
rows = cur.fetchall()
# FTS-only path: preserve pre-T96 behaviour exactly.
@@ -331,6 +342,28 @@ def _rrf_fuse_and_rerank(
query_vector=query_vector,
k=vec_over_fetch,
)
# T113: drop vector hits that fall outside the active branch's event-id
# range. ``vector_search`` is a generic service used elsewhere; the
# branch filter applied to the FTS leg also has to apply here so the
# fused result respects the same scope. Memories with NULL event_id
# (legacy rows projected before T109's 0014 schema migration) are
# included unconditionally — same policy as the FTS leg.
from chat.state.branches import _NO_HEAD_CLAMP, active_branch_event_ids
vec_origin, vec_head = active_branch_event_ids(conn)
if vec_hits and (vec_origin > 0 or vec_head < _NO_HEAD_CLAMP):
vec_ids = [h["memory_id"] for h in vec_hits]
placeholders_v = ",".join("?" * len(vec_ids))
in_range = {
row[0]
for row in conn.execute(
f"SELECT id FROM memories "
f"WHERE id IN ({placeholders_v}) "
f" AND (event_id IS NULL OR event_id BETWEEN ? AND ?)",
(*vec_ids, vec_origin, vec_head),
).fetchall()
}
vec_hits = [h for h in vec_hits if h["memory_id"] in in_range]
vec_rank_by_id: dict[int, int] = {
hit["memory_id"]: rank for rank, hit in enumerate(vec_hits)
}
+10 -1
View File
@@ -71,18 +71,27 @@ def _read_recent_meanwhile_dialogue(
that already match — avoids an unbounded scan as ``event_log``
grows. The user-side rows match on chat_id only since they aren't
tagged with a scene id (they ride the chat-wide log).
T113: clamp by the active branch's ``[origin, head]`` event-id range
so meanwhile prompt context respects the user's current branch.
Bootstrap-main and "no active branch" both fall through to ``(0,
BIG_INT)`` — no functional change for the metadata-only Phase 4 era.
"""
from chat.state.branches import active_branch_event_ids
origin, head = active_branch_event_ids(conn)
cur = conn.execute(
"SELECT id, kind, payload_json FROM event_log "
"WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
" AND superseded_by IS NULL AND hidden = 0 "
" AND id BETWEEN ? AND ? "
" AND json_extract(payload_json, '$.chat_id') = ? "
" AND ("
" kind IN ('user_turn', 'user_turn_edit') "
" OR json_extract(payload_json, '$.meanwhile_scene_id') = ?"
" ) "
"ORDER BY id DESC LIMIT ?",
(chat_id, scene_id, limit),
(origin, head, chat_id, scene_id, limit),
)
rows = cur.fetchall()
rows.reverse()