feat: branching read-side filter — event readers consult active branch range (T113)

Wire the active branch's [origin_event_id, head_event_id] window into every user-facing event/memory reader so switching branches actually changes what dialogue and memories the user sees. Phase 4 T89/T94 shipped branches as metadata-only — this closes the loop. Helper: - chat/state/branches.py: add `active_branch_event_ids(conn)` returning the active branch's id range, with two defensive fall-throughs to `(0, BIG_INT)`: (a) no active branch row at all, and (b) the bootstrap "main" sentinel (name="main", origin=0, head=0). Production never bumps main's head_event_id today, so this preserves existing reader behaviour for every test that doesn't explicitly switch. Readers updated (all user-facing dialogue / retrieval surfaces): - chat/services/turn_common.py::read_recent_dialogue — chat-history prompt context + the chat-view template path (via web/turns.py + web/chat.py). - chat/services/scene_summarize.py::_read_recent_dialogue — scene-close per-POV summary input. - chat/state/memory.py::search_memories — FTS leg filters via m.event_id (T109's column); legacy NULL event_id rows are *included* unconditionally so the filter doesn't break pre-0014 retrieval. The fused (FTS + RRF + vector) path also drops vector hits whose event_id falls outside the branch window. - chat/web/meanwhile.py::_read_recent_meanwhile_dialogue — meanwhile prompt context. Projector queries (chat/state/world.py et al.) and admin/management surfaces (drawer hide-panel, cross-chat search, regenerate's row lookups by id) are intentionally NOT branch-filtered: projection must see the full log to build state correctly, and the admin surfaces operate across branches by design. Tests (10 new, 446 total): - tests/test_branches_state.py: 3 tests for `active_branch_event_ids` itself (bootstrap-main, no-active-branch, non-main literal range). - tests/test_branching.py: 7 cross-feature tests covering the spec's five required scenarios plus scene_summarize and meanwhile readers.
2026-04-27 06:25:22 -04:00
parent 757abf24f8
commit 456f50d334
7 changed files with 484 additions and 8 deletions
@@ -144,23 +144,36 @@ def _read_recent_dialogue(
    ``id >= since_event_id`` so callers needing a scene-scoped view (e.g.
    thread detection on close) don't pull turns that landed before the
    closing scene's ``scene_opened`` event.
+
+    T113: also clamps by the active branch's ``[origin, head]`` event-id
+    range so scene-summary inputs respect the user's current branch.
+    Bootstrap-main and "no active branch" fall through to ``(0, BIG_INT)``
+    so existing flows are unchanged.
    """
+    from chat.state.branches import active_branch_event_ids
+
+    origin, head = active_branch_event_ids(conn)
    if since_event_id is None:
        cur = conn.execute(
            "SELECT kind, payload_json FROM event_log "
            "WHERE kind IN ('user_turn', 'assistant_turn') "
            "  AND superseded_by IS NULL AND hidden = 0 "
+            "  AND id BETWEEN ? AND ? "
            "ORDER BY id DESC LIMIT ?",
-            (limit,),
+            (origin, head, limit),
        )
    else:
+        # Compose ``since_event_id`` with the branch lower bound — readers
+        # want the tightest ``id >= max(since, origin)`` clamp without an
+        # extra Python pass.
+        lower = max(origin, since_event_id)
        cur = conn.execute(
            "SELECT kind, payload_json FROM event_log "
            "WHERE kind IN ('user_turn', 'assistant_turn') "
            "  AND superseded_by IS NULL AND hidden = 0 "
-            "  AND id >= ? "
+            "  AND id BETWEEN ? AND ? "
            "ORDER BY id DESC LIMIT ?",
-            (since_event_id, limit),
+            (lower, head, limit),
        )
    rows = list(reversed(cur.fetchall()))
    out: list[dict] = []
@@ -30,6 +30,7 @@ from __future__ import annotations
 import json
 from sqlite3 import Connection

+from chat.state.branches import active_branch_event_ids
 from chat.state.edges import get_edge


@@ -60,15 +61,22 @@ def read_recent_dialogue(
    previous implementation filtered chat_id post-fetch in Python, which
    let foreign-chat rows fill the LIMIT and yield fewer than N relevant
    rows in busy multi-chat databases.
+
+    T113: clamp by the active branch's ``[origin, head]`` event-id range so
+    switching branches actually changes what dialogue this read sees.
+    Bootstrap-main and "no active branch" both fall through to ``(0,
+    BIG_INT)`` — no functional change for the metadata-only Phase 4 era.
    """
+    origin, head = active_branch_event_ids(conn)
    if exclude_event_id is None:
        cur = conn.execute(
            "SELECT id, kind, payload_json FROM event_log "
            "WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
            "  AND superseded_by IS NULL AND hidden = 0 "
+            "  AND id BETWEEN ? AND ? "
            "  AND json_extract(payload_json, '$.chat_id') = ? "
            "ORDER BY id DESC LIMIT ?",
-            (chat_id, limit),
+            (origin, head, chat_id, limit),
        )
    else:
        cur = conn.execute(
@@ -76,9 +84,10 @@ def read_recent_dialogue(
            "WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
            "  AND id != ? "
            "  AND superseded_by IS NULL AND hidden = 0 "
+            "  AND id BETWEEN ? AND ? "
            "  AND json_extract(payload_json, '$.chat_id') = ? "
            "ORDER BY id DESC LIMIT ?",
-            (exclude_event_id, chat_id, limit),
+            (exclude_event_id, origin, head, chat_id, limit),
        )
    rows = list(reversed(cur.fetchall()))
    out: list[dict] = []
@@ -157,8 +157,58 @@ def active_branch(conn: Connection) -> dict | None:
    }


+# T113: sentinel "no upper bound" used by ``active_branch_event_ids`` when the
+# active branch's head is unset (the bootstrap "main" branch with origin=0 +
+# head=0). Readers compose ``id BETWEEN origin AND head`` so a value larger
+# than any possible row id behaves as "no clamp" without needing a separate
+# code path. ``2**63 - 1`` is SQLite's max signed-int — safe forever.
+_NO_HEAD_CLAMP = 2**63 - 1
+
+
+def active_branch_event_ids(conn: Connection) -> tuple[int, int]:
+    """Return ``(origin_event_id, head_event_id)`` for the currently active
+    branch, suitable as bounds for an ``event_log.id BETWEEN ? AND ?`` clamp
+    on user-facing reads (T113).
+
+    Defensive defaults:
+
+    * **No active branch row** (``active_branch`` returns ``None``) — return
+      ``(0, _NO_HEAD_CLAMP)`` so readers see all events. This preserves the
+      Phase 4 "branches are metadata-only" contract for any code path that
+      somehow runs without the migration-0013 bootstrap.
+    * **Bootstrap "main"** — the canonical ``name="main", origin=0, head=0``
+      row inserted by migration 0013. Production today never emits
+      ``branch_head_updated`` for main, so head stays at 0 even as events
+      accumulate. We treat this exact bootstrap state as "no clamp" and
+      return ``(0, _NO_HEAD_CLAMP)`` so all events remain visible. This is
+      what every existing test (which never configures branches) relies on.
+    * **Any other branch** — return the literal ``(origin, head)`` from the
+      branch row. A branch created at origin=N has head=N initially (per
+      ``branch_from_event``), so ``BETWEEN N AND N`` returns just that one
+      seed event until the head is bumped via ``branch_head_updated``.
+
+    Note on the schema mismatch with the T113 spec: the spec describes
+    ``head_event_id`` as nullable, but migration 0013 declared it
+    ``NOT NULL DEFAULT 0``. We read head=0 on bootstrap main as the
+    "unset" sentinel; non-main branches never reach head=0 in normal
+    flow (creation sets head=origin, and origin=0 only for main).
+    """
+    branch = active_branch(conn)
+    if branch is None:
+        return (0, _NO_HEAD_CLAMP)
+    origin = int(branch.get("origin_event_id") or 0)
+    head = int(branch.get("head_event_id") or 0)
+    # Bootstrap "main" sentinel — see docstring above. Detect by name +
+    # both ids being 0 to avoid mis-firing on a hypothetical future
+    # branch that legitimately starts at origin=0.
+    if branch.get("name") == "main" and origin == 0 and head == 0:
+        return (0, _NO_HEAD_CLAMP)
+    return (origin, head)
+
+
 __all__ = [
    "get_branch",
    "list_branches",
    "active_branch",
+    "active_branch_event_ids",
 ]
@@ -213,12 +213,20 @@ def search_memories(
    # channel) so memories that are weak in FTS but strong in vector — and
    # vice versa — make it into the merge pool.
    over_fetch = max(k * 2, 20) if query_vector is not None else max(k * 4, 20)
+    # T113: branch-scope filter on ``m.event_id`` (T109's column). Memories
+    # whose ``event_id`` is NULL — projected before the 0014 schema migration
+    # ran — are *included* unconditionally so the branch filter never breaks
+    # legacy retrieval. Newer rows respect the active branch's bounds.
+    from chat.state.branches import active_branch_event_ids
+
+    origin, head = active_branch_event_ids(conn)
    sql = (
        f"SELECT {select_list}, memories_fts.rank AS fts_rank "
        "FROM memories_fts "
        "JOIN memories m ON m.id = memories_fts.rowid "
        f"WHERE m.owner_id = ? AND m.{witness_col} = 1 "
        "AND memories_fts MATCH ? "
+        "AND (m.event_id IS NULL OR m.event_id BETWEEN ? AND ?) "
        # T57: significance multiplier biases the FTS over-fetch order. BM25
        # ``rank`` is lower-is-better, so subtracting ``significance * BIAS``
        # surfaces higher-significance rows above lower-significance rows with
@@ -227,7 +235,10 @@ def search_memories(
        "ORDER BY (memories_fts.rank - m.significance * ?) ASC "
        "LIMIT ?"
    )
-    cur = conn.execute(sql, (owner_id, query, SIGNIFICANCE_RANK_BIAS, over_fetch))
+    cur = conn.execute(
+        sql,
+        (owner_id, query, origin, head, SIGNIFICANCE_RANK_BIAS, over_fetch),
+    )
    rows = cur.fetchall()

    # FTS-only path: preserve pre-T96 behaviour exactly.
@@ -331,6 +342,28 @@ def _rrf_fuse_and_rerank(
        query_vector=query_vector,
        k=vec_over_fetch,
    )
+    # T113: drop vector hits that fall outside the active branch's event-id
+    # range. ``vector_search`` is a generic service used elsewhere; the
+    # branch filter applied to the FTS leg also has to apply here so the
+    # fused result respects the same scope. Memories with NULL event_id
+    # (legacy rows projected before T109's 0014 schema migration) are
+    # included unconditionally — same policy as the FTS leg.
+    from chat.state.branches import _NO_HEAD_CLAMP, active_branch_event_ids
+
+    vec_origin, vec_head = active_branch_event_ids(conn)
+    if vec_hits and (vec_origin > 0 or vec_head < _NO_HEAD_CLAMP):
+        vec_ids = [h["memory_id"] for h in vec_hits]
+        placeholders_v = ",".join("?" * len(vec_ids))
+        in_range = {
+            row[0]
+            for row in conn.execute(
+                f"SELECT id FROM memories "
+                f"WHERE id IN ({placeholders_v}) "
+                f"  AND (event_id IS NULL OR event_id BETWEEN ? AND ?)",
+                (*vec_ids, vec_origin, vec_head),
+            ).fetchall()
+        }
+        vec_hits = [h for h in vec_hits if h["memory_id"] in in_range]
    vec_rank_by_id: dict[int, int] = {
        hit["memory_id"]: rank for rank, hit in enumerate(vec_hits)
    }
@@ -71,18 +71,27 @@ def _read_recent_meanwhile_dialogue(
    that already match — avoids an unbounded scan as ``event_log``
    grows. The user-side rows match on chat_id only since they aren't
    tagged with a scene id (they ride the chat-wide log).
+
+    T113: clamp by the active branch's ``[origin, head]`` event-id range
+    so meanwhile prompt context respects the user's current branch.
+    Bootstrap-main and "no active branch" both fall through to ``(0,
+    BIG_INT)`` — no functional change for the metadata-only Phase 4 era.
    """
+    from chat.state.branches import active_branch_event_ids
+
+    origin, head = active_branch_event_ids(conn)
    cur = conn.execute(
        "SELECT id, kind, payload_json FROM event_log "
        "WHERE kind IN ('user_turn', 'user_turn_edit', 'assistant_turn') "
        "  AND superseded_by IS NULL AND hidden = 0 "
+        "  AND id BETWEEN ? AND ? "
        "  AND json_extract(payload_json, '$.chat_id') = ? "
        "  AND ("
        "    kind IN ('user_turn', 'user_turn_edit') "
        "    OR json_extract(payload_json, '$.meanwhile_scene_id') = ?"
        "  ) "
        "ORDER BY id DESC LIMIT ?",
-        (chat_id, scene_id, limit),
+        (origin, head, chat_id, scene_id, limit),
    )
    rows = cur.fetchall()
    rows.reverse()