feat: branching read-side filter — event readers consult active branch range (T113)

Wire the active branch's [origin_event_id, head_event_id] window into
every user-facing event/memory reader so switching branches actually
changes what dialogue and memories the user sees. Phase 4 T89/T94
shipped branches as metadata-only — this closes the loop.

Helper:
- chat/state/branches.py: add `active_branch_event_ids(conn)` returning
  the active branch's id range, with two defensive fall-throughs to
  `(0, BIG_INT)`: (a) no active branch row at all, and (b) the
  bootstrap "main" sentinel (name="main", origin=0, head=0). Production
  never bumps main's head_event_id today, so this preserves existing
  reader behaviour for every test that doesn't explicitly switch.

Readers updated (all user-facing dialogue / retrieval surfaces):
- chat/services/turn_common.py::read_recent_dialogue — chat-history
  prompt context + the chat-view template path (via web/turns.py +
  web/chat.py).
- chat/services/scene_summarize.py::_read_recent_dialogue — scene-close
  per-POV summary input.
- chat/state/memory.py::search_memories — FTS leg filters via
  m.event_id (T109's column); legacy NULL event_id rows are *included*
  unconditionally so the filter doesn't break pre-0014 retrieval. The
  fused (FTS + RRF + vector) path also drops vector hits whose
  event_id falls outside the branch window.
- chat/web/meanwhile.py::_read_recent_meanwhile_dialogue — meanwhile
  prompt context.

Projector queries (chat/state/world.py et al.) and admin/management
surfaces (drawer hide-panel, cross-chat search, regenerate's row
lookups by id) are intentionally NOT branch-filtered: projection must
see the full log to build state correctly, and the admin surfaces
operate across branches by design.

Tests (10 new, 446 total):
- tests/test_branches_state.py: 3 tests for `active_branch_event_ids`
  itself (bootstrap-main, no-active-branch, non-main literal range).
- tests/test_branching.py: 7 cross-feature tests covering the spec's
  five required scenarios plus scene_summarize and meanwhile readers.
This commit is contained in:
Joseph Doherty
2026-04-27 06:25:22 -04:00
parent 757abf24f8
commit 456f50d334
7 changed files with 484 additions and 8 deletions
+16 -3
View File
@@ -144,23 +144,36 @@ def _read_recent_dialogue(
``id >= since_event_id`` so callers needing a scene-scoped view (e.g.
thread detection on close) don't pull turns that landed before the
closing scene's ``scene_opened`` event.
T113: also clamps by the active branch's ``[origin, head]`` event-id
range so scene-summary inputs respect the user's current branch.
Bootstrap-main and "no active branch" fall through to ``(0, BIG_INT)``
so existing flows are unchanged.
"""
from chat.state.branches import active_branch_event_ids
origin, head = active_branch_event_ids(conn)
if since_event_id is None:
cur = conn.execute(
"SELECT kind, payload_json FROM event_log "
"WHERE kind IN ('user_turn', 'assistant_turn') "
" AND superseded_by IS NULL AND hidden = 0 "
" AND id BETWEEN ? AND ? "
"ORDER BY id DESC LIMIT ?",
(limit,),
(origin, head, limit),
)
else:
# Compose ``since_event_id`` with the branch lower bound — readers
# want the tightest ``id >= max(since, origin)`` clamp without an
# extra Python pass.
lower = max(origin, since_event_id)
cur = conn.execute(
"SELECT kind, payload_json FROM event_log "
"WHERE kind IN ('user_turn', 'assistant_turn') "
" AND superseded_by IS NULL AND hidden = 0 "
" AND id >= ? "
" AND id BETWEEN ? AND ? "
"ORDER BY id DESC LIMIT ?",
(since_event_id, limit),
(lower, head, limit),
)
rows = list(reversed(cur.fetchall()))
out: list[dict] = []
+11 -2
View File
@@ -30,6 +30,7 @@ from __future__ import annotations
import json
from sqlite3 import Connection
from chat.state.branches import active_branch_event_ids
from chat.state.edges import get_edge
@@ -60,15 +61,22 @@ def read_recent_dialogue(
previous implementation filtered chat_id post-fetch in Python, which
let foreign-chat rows fill the LIMIT and yield fewer than N relevant
rows in busy multi-chat databases.
T113: clamp by the active branch's ``[origin, head]`` event-id range so
switching branches actually changes what dialogue this read sees.
Bootstrap-main and "no active branch" both fall through to ``(0,
BIG_INT)`` — no functional change for the metadata-only Phase 4 era.
"""
origin, head = active_branch_event_ids(conn)
if exclude_event_id is None:
cur = conn.execute(
"SELECT id, kind, payload_json FROM event_log "
"WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
" AND superseded_by IS NULL AND hidden = 0 "
" AND id BETWEEN ? AND ? "
" AND json_extract(payload_json, '$.chat_id') = ? "
"ORDER BY id DESC LIMIT ?",
(chat_id, limit),
(origin, head, chat_id, limit),
)
else:
cur = conn.execute(
@@ -76,9 +84,10 @@ def read_recent_dialogue(
"WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
" AND id != ? "
" AND superseded_by IS NULL AND hidden = 0 "
" AND id BETWEEN ? AND ? "
" AND json_extract(payload_json, '$.chat_id') = ? "
"ORDER BY id DESC LIMIT ?",
(exclude_event_id, chat_id, limit),
(exclude_event_id, origin, head, chat_id, limit),
)
rows = list(reversed(cur.fetchall()))
out: list[dict] = []
+50
View File
@@ -157,8 +157,58 @@ def active_branch(conn: Connection) -> dict | None:
}
# T113: sentinel "no upper bound" used by ``active_branch_event_ids`` when the
# active branch's head is unset (the bootstrap "main" branch with origin=0 +
# head=0). Readers compose ``id BETWEEN origin AND head`` so a value larger
# than any possible row id behaves as "no clamp" without needing a separate
# code path. ``2**63 - 1`` is SQLite's max signed-int — safe forever.
_NO_HEAD_CLAMP = 2**63 - 1
def active_branch_event_ids(conn: Connection) -> tuple[int, int]:
"""Return ``(origin_event_id, head_event_id)`` for the currently active
branch, suitable as bounds for an ``event_log.id BETWEEN ? AND ?`` clamp
on user-facing reads (T113).
Defensive defaults:
* **No active branch row** (``active_branch`` returns ``None``) — return
``(0, _NO_HEAD_CLAMP)`` so readers see all events. This preserves the
Phase 4 "branches are metadata-only" contract for any code path that
somehow runs without the migration-0013 bootstrap.
* **Bootstrap "main"** — the canonical ``name="main", origin=0, head=0``
row inserted by migration 0013. Production today never emits
``branch_head_updated`` for main, so head stays at 0 even as events
accumulate. We treat this exact bootstrap state as "no clamp" and
return ``(0, _NO_HEAD_CLAMP)`` so all events remain visible. This is
what every existing test (which never configures branches) relies on.
* **Any other branch** — return the literal ``(origin, head)`` from the
branch row. A branch created at origin=N has head=N initially (per
``branch_from_event``), so ``BETWEEN N AND N`` returns just that one
seed event until the head is bumped via ``branch_head_updated``.
Note on the schema mismatch with the T113 spec: the spec describes
``head_event_id`` as nullable, but migration 0013 declared it
``NOT NULL DEFAULT 0``. We read head=0 on bootstrap main as the
"unset" sentinel; non-main branches never reach head=0 in normal
flow (creation sets head=origin, and origin=0 only for main).
"""
branch = active_branch(conn)
if branch is None:
return (0, _NO_HEAD_CLAMP)
origin = int(branch.get("origin_event_id") or 0)
head = int(branch.get("head_event_id") or 0)
# Bootstrap "main" sentinel — see docstring above. Detect by name +
# both ids being 0 to avoid mis-firing on a hypothetical future
# branch that legitimately starts at origin=0.
if branch.get("name") == "main" and origin == 0 and head == 0:
return (0, _NO_HEAD_CLAMP)
return (origin, head)
__all__ = [
"get_branch",
"list_branches",
"active_branch",
"active_branch_event_ids",
]
+34 -1
View File
@@ -213,12 +213,20 @@ def search_memories(
# channel) so memories that are weak in FTS but strong in vector — and
# vice versa — make it into the merge pool.
over_fetch = max(k * 2, 20) if query_vector is not None else max(k * 4, 20)
# T113: branch-scope filter on ``m.event_id`` (T109's column). Memories
# whose ``event_id`` is NULL — projected before the 0014 schema migration
# ran — are *included* unconditionally so the branch filter never breaks
# legacy retrieval. Newer rows respect the active branch's bounds.
from chat.state.branches import active_branch_event_ids
origin, head = active_branch_event_ids(conn)
sql = (
f"SELECT {select_list}, memories_fts.rank AS fts_rank "
"FROM memories_fts "
"JOIN memories m ON m.id = memories_fts.rowid "
f"WHERE m.owner_id = ? AND m.{witness_col} = 1 "
"AND memories_fts MATCH ? "
"AND (m.event_id IS NULL OR m.event_id BETWEEN ? AND ?) "
# T57: significance multiplier biases the FTS over-fetch order. BM25
# ``rank`` is lower-is-better, so subtracting ``significance * BIAS``
# surfaces higher-significance rows above lower-significance rows with
@@ -227,7 +235,10 @@ def search_memories(
"ORDER BY (memories_fts.rank - m.significance * ?) ASC "
"LIMIT ?"
)
cur = conn.execute(sql, (owner_id, query, SIGNIFICANCE_RANK_BIAS, over_fetch))
cur = conn.execute(
sql,
(owner_id, query, origin, head, SIGNIFICANCE_RANK_BIAS, over_fetch),
)
rows = cur.fetchall()
# FTS-only path: preserve pre-T96 behaviour exactly.
@@ -331,6 +342,28 @@ def _rrf_fuse_and_rerank(
query_vector=query_vector,
k=vec_over_fetch,
)
# T113: drop vector hits that fall outside the active branch's event-id
# range. ``vector_search`` is a generic service used elsewhere; the
# branch filter applied to the FTS leg also has to apply here so the
# fused result respects the same scope. Memories with NULL event_id
# (legacy rows projected before T109's 0014 schema migration) are
# included unconditionally — same policy as the FTS leg.
from chat.state.branches import _NO_HEAD_CLAMP, active_branch_event_ids
vec_origin, vec_head = active_branch_event_ids(conn)
if vec_hits and (vec_origin > 0 or vec_head < _NO_HEAD_CLAMP):
vec_ids = [h["memory_id"] for h in vec_hits]
placeholders_v = ",".join("?" * len(vec_ids))
in_range = {
row[0]
for row in conn.execute(
f"SELECT id FROM memories "
f"WHERE id IN ({placeholders_v}) "
f" AND (event_id IS NULL OR event_id BETWEEN ? AND ?)",
(*vec_ids, vec_origin, vec_head),
).fetchall()
}
vec_hits = [h for h in vec_hits if h["memory_id"] in in_range]
vec_rank_by_id: dict[int, int] = {
hit["memory_id"]: rank for rank, hit in enumerate(vec_hits)
}
+10 -1
View File
@@ -71,18 +71,27 @@ def _read_recent_meanwhile_dialogue(
that already match — avoids an unbounded scan as ``event_log``
grows. The user-side rows match on chat_id only since they aren't
tagged with a scene id (they ride the chat-wide log).
T113: clamp by the active branch's ``[origin, head]`` event-id range
so meanwhile prompt context respects the user's current branch.
Bootstrap-main and "no active branch" both fall through to ``(0,
BIG_INT)`` — no functional change for the metadata-only Phase 4 era.
"""
from chat.state.branches import active_branch_event_ids
origin, head = active_branch_event_ids(conn)
cur = conn.execute(
"SELECT id, kind, payload_json FROM event_log "
"WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
" AND superseded_by IS NULL AND hidden = 0 "
" AND id BETWEEN ? AND ? "
" AND json_extract(payload_json, '$.chat_id') = ? "
" AND ("
" kind IN ('user_turn', 'user_turn_edit') "
" OR json_extract(payload_json, '$.meanwhile_scene_id') = ?"
" ) "
"ORDER BY id DESC LIMIT ?",
(chat_id, scene_id, limit),
(origin, head, chat_id, scene_id, limit),
)
rows = cur.fetchall()
rows.reverse()
+87 -1
View File
@@ -7,7 +7,13 @@ from chat.db.migrate import apply_migrations
from chat.eventlog.log import append_event
from chat.eventlog.projector import project
import chat.state.branches # registers handlers
from chat.state.branches import active_branch, get_branch, list_branches
from chat.state.branches import (
_NO_HEAD_CLAMP,
active_branch,
active_branch_event_ids,
get_branch,
list_branches,
)
def test_main_branch_bootstrapped_by_migration(tmp_path):
@@ -174,3 +180,83 @@ def test_branch_switched_unknown_name_warns(tmp_path, caplog):
# The unknown name was not inserted as a side effect.
assert get_branch(conn, "does_not_exist") is None
def test_active_branch_event_ids_bootstrap_main_returns_no_clamp(tmp_path):
"""Bootstrap "main" (origin=0, head=0) reads as the no-clamp sentinel.
Migration 0013 seeds main with both event-id columns at 0; production
today never emits ``branch_head_updated`` for main, so head stays at 0
even as events accumulate. The helper treats this exact bootstrap
state as "all events visible" (lower bound 0, upper bound BIG_INT) so
every existing reader stays branch-agnostic until a non-main branch
becomes active.
"""
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
origin, head = active_branch_event_ids(conn)
assert origin == 0
assert head == _NO_HEAD_CLAMP
def test_active_branch_event_ids_no_active_branch_falls_through(tmp_path):
"""No active branch row at all → defensive ``(0, BIG_INT)``.
A switch to an unknown branch leaves zero rows with ``is_active=1``;
``active_branch`` returns None. The helper must still hand readers a
workable range (the full log) so the read pipeline doesn't crash on
an inconsistent metadata state.
"""
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
# Switching to a nonexistent branch clears is_active flags
# without setting any other branch active.
append_event(
conn,
kind="branch_switched",
payload={"name": "does_not_exist"},
)
project(conn)
assert active_branch(conn) is None
origin, head = active_branch_event_ids(conn)
assert origin == 0
assert head == _NO_HEAD_CLAMP
def test_active_branch_event_ids_returns_actual_range_for_non_main(tmp_path):
"""Non-main branches return their literal ``(origin, head)`` window.
A branch created at origin=10 + bumped to head=20 must surface as
(10, 20) so readers' ``BETWEEN`` clamp scopes to that window.
"""
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
append_event(
conn,
kind="branch_created",
payload={
"name": "experiment",
"origin_event_id": 10,
"head_event_id": 10,
"chat_id": "c1",
},
)
append_event(
conn,
kind="branch_head_updated",
payload={"name": "experiment", "head_event_id": 20},
)
append_event(
conn,
kind="branch_switched",
payload={"name": "experiment"},
)
project(conn)
origin, head = active_branch_event_ids(conn)
assert origin == 10
assert head == 20
+276
View File
@@ -129,3 +129,279 @@ def test_list_branches_with_metadata_includes_event_count(tmp_path):
assert rows["exp"]["origin_event_id"] == 10
assert rows["exp"]["head_event_id"] == 15
assert rows["exp"]["event_count"] == 6
# ---------------------------------------------------------------------------
# T113 read-side filter — cross-feature tests.
# ---------------------------------------------------------------------------
#
# These exercise the active-branch event-id clamp through every reader
# the spec called out: ``read_recent_dialogue`` (turn_common),
# ``_read_recent_dialogue`` (scene_summarize), and ``search_memories``
# (memory). They drive the readers via real event-log inserts + branch
# switches so the integration is end-to-end.
def _seed_user_turn(conn, chat_id: str, prose: str) -> int:
return append_and_apply(
conn,
kind="user_turn",
payload={"chat_id": chat_id, "prose": prose, "segments": []},
)
def test_read_recent_dialogue_respects_active_branch_head(tmp_path):
"""T113 spec test 1: dialogue reader clamps to active branch head.
Seed 10 user turns; create a branch with origin=1 + head=5 and switch
to it; assert ``read_recent_dialogue`` only returns the first 5
turns. (The 5 events with id 6..10 fall outside ``[1, 5]``.)
"""
from chat.services.turn_common import read_recent_dialogue
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
ids = [_seed_user_turn(conn, "c1", f"turn {i}") for i in range(10)]
# 5 events visible after the switch.
branch_from_event(
conn, name="halfway", origin_event_id=ids[0], chat_id="c1"
)
append_and_apply(
conn,
kind="branch_head_updated",
payload={"name": "halfway", "head_event_id": ids[4]},
)
switch_active_branch(conn, name="halfway")
rows = read_recent_dialogue(conn, "c1")
# The reader returns oldest-first, so the visible-set is the
# first 5 turns.
assert len(rows) == 5
assert [r["text"] for r in rows] == [f"turn {i}" for i in range(5)]
def test_search_memories_respects_active_branch_head(tmp_path):
"""T113 spec test 2: memory search clamps to active branch head via
``memories.event_id``. Memories whose projecting event lands outside
the clamp drop out of FTS results."""
from chat.eventlog.log import append_and_apply as _aa
from chat.state.memory import search_memories
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
# Two memories projected from real events. The projector handler
# stamps memories.event_id from the projecting event's id.
ev_a = _aa(
conn,
kind="memory_written",
payload={
"owner_id": "host_bot",
"chat_id": "c1",
"scene_id": 1,
"pov_summary": "alpha keyword present",
"witness_you": 1,
"witness_host": 1,
"witness_guest": 0,
},
)
ev_b = _aa(
conn,
kind="memory_written",
payload={
"owner_id": "host_bot",
"chat_id": "c1",
"scene_id": 1,
"pov_summary": "alpha keyword present too",
"witness_you": 1,
"witness_host": 1,
"witness_guest": 0,
},
)
# Branch clamps to ev_a only (head = ev_a; ev_b sits past head).
branch_from_event(
conn, name="early", origin_event_id=ev_a, chat_id="c1"
)
switch_active_branch(conn, name="early")
results = search_memories(conn, "host_bot", "host", "alpha")
# Only the first memory should surface — the second's event_id
# exceeds the active branch head.
ids = [r["event_id"] for r in results]
assert ev_a in ids
assert ev_b not in ids
def test_branch_switch_changes_visible_events(tmp_path):
"""T113 spec test 3: switching branches mid-flight changes the read
immediately. ``read_recent_dialogue`` re-queries on every call."""
from chat.services.turn_common import read_recent_dialogue
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
ids = [_seed_user_turn(conn, "c1", f"turn {i}") for i in range(6)]
branch_from_event(
conn, name="early", origin_event_id=ids[0], chat_id="c1"
)
append_and_apply(
conn,
kind="branch_head_updated",
payload={"name": "early", "head_event_id": ids[2]},
)
branch_from_event(
conn, name="late", origin_event_id=ids[3], chat_id="c1"
)
append_and_apply(
conn,
kind="branch_head_updated",
payload={"name": "late", "head_event_id": ids[5]},
)
switch_active_branch(conn, name="early")
early_rows = [r["text"] for r in read_recent_dialogue(conn, "c1")]
assert early_rows == ["turn 0", "turn 1", "turn 2"]
switch_active_branch(conn, name="late")
late_rows = [r["text"] for r in read_recent_dialogue(conn, "c1")]
assert late_rows == ["turn 3", "turn 4", "turn 5"]
def test_main_branch_with_head_zero_returns_empty(tmp_path):
"""T113 spec test 4: a non-main branch with head=0 returns empty.
The bootstrap-main sentinel only fires for ``name=="main", origin=0,
head=0``. A different branch parked at ``origin=0, head=0`` is not a
sentinel and the ``BETWEEN 0 AND 0`` clamp filters out every real
event_log row (rowids start at 1)."""
from chat.services.turn_common import read_recent_dialogue
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
# Need a real event_log row id 1+ so the clamp's "exclude 0" actually
# has something to exclude — otherwise we trivially return [].
_seed_user_turn(conn, "c1", "turn 0")
# Force-create a branch at origin=0, head=0 (NOT main). This is an
# artificial state — production never produces it — but it's the
# cleanest way to drive the documented edge case.
append_and_apply(
conn,
kind="branch_created",
payload={
"name": "stub",
"origin_event_id": 0,
"head_event_id": 0,
"chat_id": "c1",
},
)
switch_active_branch(conn, name="stub")
rows = read_recent_dialogue(conn, "c1")
assert rows == []
def test_no_active_branch_falls_through_to_all_events(tmp_path):
"""T113 spec test 5: with no active branch (e.g. a switch to an
unknown name cleared all is_active flags), readers see the full log
via the ``(0, BIG_INT)`` defensive default."""
from chat.services.turn_common import read_recent_dialogue
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
for i in range(3):
_seed_user_turn(conn, "c1", f"turn {i}")
# Switching to an unknown branch leaves zero rows with is_active=1.
append_and_apply(
conn,
kind="branch_switched",
payload={"name": "missing"},
)
from chat.state.branches import active_branch as _ab
assert _ab(conn) is None
rows = read_recent_dialogue(conn, "c1")
assert [r["text"] for r in rows] == ["turn 0", "turn 1", "turn 2"]
def test_scene_summarize_read_recent_dialogue_respects_branch(tmp_path):
"""T113: ``scene_summarize._read_recent_dialogue`` (the scene-close
summary input) also clamps to the active branch range."""
from chat.services.scene_summarize import _read_recent_dialogue
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
ids = [_seed_user_turn(conn, "c1", f"turn {i}") for i in range(6)]
branch_from_event(
conn, name="early", origin_event_id=ids[0], chat_id="c1"
)
append_and_apply(
conn,
kind="branch_head_updated",
payload={"name": "early", "head_event_id": ids[2]},
)
switch_active_branch(conn, name="early")
rows = _read_recent_dialogue(conn, "c1")
assert [r["text"] for r in rows] == ["turn 0", "turn 1", "turn 2"]
def test_meanwhile_dialogue_reader_respects_branch(tmp_path):
"""T113: meanwhile prompt-context reader also clamps to the active
branch. The meanwhile reader filters by ``meanwhile_scene_id``; the
branch filter is composed on top of that filter."""
from chat.web.meanwhile import _read_recent_meanwhile_dialogue
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
# Seed user turns + meanwhile assistant turns interleaved so the
# branch-id clamp lands across both kinds.
u1 = _seed_user_turn(conn, "c1", "u1")
a1 = append_and_apply(
conn,
kind="assistant_turn",
payload={
"chat_id": "c1",
"speaker_id": "host",
"text": "a1",
"meanwhile_scene_id": 7,
},
)
# Past-head turn should NOT appear once we switch to ``early``.
a2 = append_and_apply(
conn,
kind="assistant_turn",
payload={
"chat_id": "c1",
"speaker_id": "guest",
"text": "a2",
"meanwhile_scene_id": 7,
},
)
branch_from_event(
conn, name="early", origin_event_id=u1, chat_id="c1"
)
append_and_apply(
conn,
kind="branch_head_updated",
payload={"name": "early", "head_event_id": a1},
)
switch_active_branch(conn, name="early")
rows = _read_recent_meanwhile_dialogue(conn, "c1", scene_id=7)
texts = [r["text"] for r in rows]
assert "a1" in texts
assert "a2" not in texts
# Suppress the "unused" linter warning while keeping the binding
# readable for the test narrative.
_ = a2