merge: T71 prompt.py polish (witness role + ACTIVITIES + NICE trim docs)

This commit is contained in:
Joseph Doherty
2026-04-26 17:18:02 -04:00
2 changed files with 397 additions and 41 deletions
+130 -41
View File
@@ -273,6 +273,18 @@ def _resolve_previous_scene_summary(
return mem[0]
def _witness_role_for(speaker_bot_id: str, host_bot_id: str | None) -> str:
"""Return the witness POV role for the speaker's memory query.
The host bot of a chat queries memories with ``witness_role="host"``;
the guest bot queries with ``witness_role="guest"``. Phase 2 T46
pinned the contract on ``search_memories``; this helper applies it
at the call site so a guest-as-speaker doesn't silently retrieve
memories under the wrong POV mask.
"""
return "host" if speaker_bot_id == host_bot_id else "guest"
def _resolve_addressee(
conn: Connection, addressee: str, you: dict | None
) -> tuple[str, str]:
@@ -356,34 +368,57 @@ def assemble_narrative_prompt(
addressee_name,
)
# Activity for present entities. Core (MUST): you + speaker bot.
# Phase 2 (SHOULD-tier): when a third party (guest) is present in
# the chat, append their activity in a separate block so it can be
# trimmed independently under tight budget.
activities: list[dict] = []
# Activity for present entities — single ACTIVITIES: block with up
# to three bullets (you, speaker, guest). The block itself is
# MUST-tier and survives all trims, but bullet-level trim drops
# bullets in the order guest -> you, keeping the speaker bullet
# (the speaker's own current activity is the load-bearing slice).
#
# T71.2 chose Option B from the polish plan: pre-truncate the
# bullets list at trim time before _build_activity_block runs,
# rather than introducing a granular tier mode in the trim
# machinery. The single-block render avoids the dual-ACTIVITIES:
# header that Phase 2 T43 introduced (read by some LLMs as a
# duplicate-section bug).
you_activity: dict | None = None
you_act = get_activity(conn, "you")
if you_act is not None:
you_act = dict(you_act)
you_act["_display_name"] = (you or {}).get("name") or "you"
activities.append(you_act)
you_activity = dict(you_act)
you_activity["_display_name"] = (you or {}).get("name") or "you"
speaker_activity: dict | None = None
bot_act = get_activity(conn, speaker_bot_id)
if bot_act is not None:
bot_act = dict(bot_act)
bot_act["_display_name"] = bot["name"]
activities.append(bot_act)
activity_block = _build_activity_block(activities)
speaker_activity = dict(bot_act)
speaker_activity["_display_name"] = bot["name"]
# SHOULD-tier guest activity extension (Phase 2 / Task 43).
guest_activity_block: str | None = None
guest_activity: dict | None = None
if guest_id is not None:
guest_act = get_activity(conn, guest_id)
if guest_act is not None:
guest_act = dict(guest_act)
guest_activity = dict(guest_act)
guest_bot = get_bot(conn, guest_id)
guest_act["_display_name"] = (
guest_activity["_display_name"] = (
guest_bot["name"] if guest_bot else guest_id
)
guest_activity_block = _build_activity_block([guest_act])
def _activity_block_for(
*, include_you: bool, include_guest: bool
) -> str | None:
"""Render the single ACTIVITIES: block with the requested bullets.
Speaker bullet is always included (it's the MUST-tier baseline);
``you`` and ``guest`` bullets are toggled by the caller during
trim. Returns None when no bullets remain.
"""
bullets: list[dict] = []
if include_you and you_activity is not None:
bullets.append(you_activity)
if speaker_activity is not None:
bullets.append(speaker_activity)
if include_guest and guest_activity is not None:
bullets.append(guest_activity)
return _build_activity_block(bullets)
# SHOULD-tier group-node block (Phase 2 / Task 43): rendered only
# when the group_node row is present AND it covers all three of
@@ -433,7 +468,12 @@ def assemble_narrative_prompt(
memory_summaries = []
if query:
try:
hits = search_memories(conn, speaker_bot_id, "host", query, k=4)
witness_role = _witness_role_for(
speaker_bot_id, chat.get("host_bot_id")
)
hits = search_memories(
conn, speaker_bot_id, witness_role, query, k=4
)
memory_summaries = [h["pov_summary"] for h in hits]
except Exception:
memory_summaries = []
@@ -452,11 +492,18 @@ def assemble_narrative_prompt(
last4 = dialogue_full[-4:] if dialogue_full else []
must_dialogue_block = _build_dialogue_block(last4, earlier_summary=None)
# MUST-tier ACTIVITIES floor: the speaker bullet alone (you and
# guest bullets are dropped first under bullet-level trim before
# the block bottoms out at speaker-only).
must_activity_block = _activity_block_for(
include_you=False, include_guest=False
)
must_blocks: list[str | None] = [
speaker_identity,
edge_to_addressee,
scene_block,
activity_block,
must_activity_block,
must_dialogue_block,
closing,
]
@@ -481,6 +528,7 @@ def assemble_narrative_prompt(
include_previous_scene: bool,
include_memories_top_k: int,
dialogue_keep: int,
include_you_activity: bool = True,
include_guest_activity: bool = True,
include_group_node: bool = True,
) -> tuple[str, int, list[dict]]:
@@ -503,13 +551,20 @@ def assemble_narrative_prompt(
if include_previous_scene else None
)
# Single ACTIVITIES: block, bullet-level trim (T71.2). Guest
# bullet drops first, then the you bullet; speaker bullet is the
# MUST-tier floor and always present when an activity row exists.
activity_block = _activity_block_for(
include_you=include_you_activity,
include_guest=include_guest_activity,
)
body = _join_blocks([
speaker_identity,
edge_to_addressee,
other_edges_block if include_other_edges else None,
scene_block,
activity_block,
guest_activity_block if include_guest_activity else None,
group_node_block if include_group_node else None,
prev_block,
memories_block,
@@ -527,16 +582,18 @@ def assemble_narrative_prompt(
nice_memories_k = min(4, len(memory_summaries))
include_prev = previous_scene_summary is not None
include_other = other_edges_block is not None
include_guest_activity = guest_activity_block is not None
include_you_activity = you_activity is not None
include_guest_activity = guest_activity is not None
include_group_node = group_node_block is not None
def _build(*, prev: bool, mem_k: int, dlg: int, other: bool,
guest_act: bool, group: bool) -> tuple[str, int]:
you_act: bool, guest_act: bool, group: bool) -> tuple[str, int]:
body, total, _ = assemble(
include_other_edges=other,
include_previous_scene=prev,
include_memories_top_k=mem_k,
dialogue_keep=dlg,
include_you_activity=you_act,
include_guest_activity=guest_act,
include_group_node=group,
)
@@ -544,8 +601,8 @@ def assemble_narrative_prompt(
body, total = _build(
prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep,
other=include_other, guest_act=include_guest_activity,
group=include_group_node,
other=include_other, you_act=include_you_activity,
guest_act=include_guest_activity, group=include_group_node,
)
# If under soft, we're done.
@@ -554,12 +611,32 @@ def assemble_narrative_prompt(
# Drop NICE in order: previous scene → memories beyond top-2 →
# older dialogue turns (collapse to 4).
#
# T71.3 — order rationale: the §6.3 spec lists NICE-tier members
# with previous-scene LAST, which read as a literal trim order
# during T18 review. We deliberately keep the greedy order shown
# here (previous-scene FIRST) for two reasons:
#
# 1. Cheapest-impact-first: a per-POV previous-scene summary is
# a single short paragraph that loses very little narrative
# continuity when dropped, while the older dialogue turns it
# is competing with carry the speaker's last few beats — those
# ground the next response far more concretely.
# 2. Greedy lookahead is more expensive than the marginal
# narrative loss. Dropping previous-scene typically clears
# the soft-budget slack in one step; trying memories or
# dialogue first would routinely require multiple recompute
# passes through the assembler.
#
# The pin test test_nice_trim_order_documented locks this order so
# a future refactor can't quietly invert it without surfacing the
# decision.
if include_prev:
include_prev = False
body, total = _build(
prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep,
other=include_other, guest_act=include_guest_activity,
group=include_group_node,
other=include_other, you_act=include_you_activity,
guest_act=include_guest_activity, group=include_group_node,
)
if total <= budget_soft:
return _emit(body, user_turn_prose)
@@ -568,8 +645,8 @@ def assemble_narrative_prompt(
nice_memories_k = 2
body, total = _build(
prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep,
other=include_other, guest_act=include_guest_activity,
group=include_group_node,
other=include_other, you_act=include_you_activity,
guest_act=include_guest_activity, group=include_group_node,
)
if total <= budget_soft:
return _emit(body, user_turn_prose)
@@ -578,8 +655,8 @@ def assemble_narrative_prompt(
nice_dialogue_keep = baseline_keep
body, total = _build(
prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep,
other=include_other, guest_act=include_guest_activity,
group=include_group_node,
other=include_other, you_act=include_you_activity,
guest_act=include_guest_activity, group=include_group_node,
)
if total <= budget_soft:
return _emit(body, user_turn_prose)
@@ -589,35 +666,47 @@ def assemble_narrative_prompt(
nice_memories_k = max(0, nice_memories_k - 1)
body, total = _build(
prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep,
other=include_other, guest_act=include_guest_activity,
group=include_group_node,
other=include_other, you_act=include_you_activity,
guest_act=include_guest_activity, group=include_group_node,
)
# Drop SHOULD-tier blocks in order: guest activity → group node →
# other edges. (Guest activity goes first per Task 43 spec — it's
# the most expendable additive context.)
# Drop SHOULD-tier extras in order:
# 1. guest activity bullet (T71.2: bullet-level trim within the
# single ACTIVITIES: block — guest goes first per Task 43 spec)
# 2. group node block
# 3. you activity bullet (still SHOULD-tier; speaker bullet is the
# MUST-tier floor and never dropped)
# 4. other edges
if include_guest_activity and total > budget_hard:
include_guest_activity = False
body, total = _build(
prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep,
other=include_other, guest_act=include_guest_activity,
group=include_group_node,
other=include_other, you_act=include_you_activity,
guest_act=include_guest_activity, group=include_group_node,
)
if include_group_node and total > budget_hard:
include_group_node = False
body, total = _build(
prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep,
other=include_other, guest_act=include_guest_activity,
group=include_group_node,
other=include_other, you_act=include_you_activity,
guest_act=include_guest_activity, group=include_group_node,
)
if include_you_activity and total > budget_hard:
include_you_activity = False
body, total = _build(
prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep,
other=include_other, you_act=include_you_activity,
guest_act=include_guest_activity, group=include_group_node,
)
if include_other and total > budget_hard:
include_other = False
body, total = _build(
prev=include_prev, mem_k=nice_memories_k, dlg=nice_dialogue_keep,
other=include_other, guest_act=include_guest_activity,
group=include_group_node,
other=include_other, you_act=include_you_activity,
guest_act=include_guest_activity, group=include_group_node,
)
if total > budget_hard:
+267
View File
@@ -452,6 +452,273 @@ def test_assemble_when_speaker_is_guest_orients_edges_correctly(tmp_path):
assert "68/100" in body
def test_speaker_is_guest_uses_guest_witness_role(tmp_path, monkeypatch):
"""T71.1: when the guest is the speaker, ``search_memories`` is
called with ``witness_role="guest"``, not the previously-hardcoded
``"host"``. Pins the parametric witness role at the prompt call site
so guest-as-speaker honours the witness mask via Phase 2 T46.
"""
db = tmp_path / "t.db"
apply_migrations(db)
captured: dict = {}
def _fake_search(conn, owner_id, witness_role, query, k=4):
captured["owner_id"] = owner_id
captured["witness_role"] = witness_role
captured["query"] = query
return []
# Patch the imported reference inside the prompt module so the
# production call site uses the fake.
import chat.services.prompt as prompt_mod
monkeypatch.setattr(prompt_mod, "search_memories", _fake_search)
with open_db(db) as conn:
_seed_with_guest(conn)
# Guest as speaker — must request memories with witness_role="guest".
assemble_narrative_prompt(
conn,
chat_id="chat_bot_a",
speaker_bot_id="bot_b",
recent_dialogue=[],
# retrieved_memory_summaries=None forces the search path.
retrieved_memory_summaries=None,
)
assert captured["owner_id"] == "bot_b"
assert captured["witness_role"] == "guest"
def test_speaker_is_host_uses_host_witness_role(tmp_path, monkeypatch):
"""T71.1 (regression): host-as-speaker still queries with
``witness_role="host"``."""
db = tmp_path / "t.db"
apply_migrations(db)
captured: dict = {}
def _fake_search(conn, owner_id, witness_role, query, k=4):
captured["witness_role"] = witness_role
return []
import chat.services.prompt as prompt_mod
monkeypatch.setattr(prompt_mod, "search_memories", _fake_search)
with open_db(db) as conn:
_seed_with_guest(conn)
assemble_narrative_prompt(
conn,
chat_id="chat_bot_a",
speaker_bot_id="bot_a", # host as speaker
recent_dialogue=[],
retrieved_memory_summaries=None,
)
assert captured["witness_role"] == "host"
def test_single_activities_block_with_three_bullets_when_3_entities(tmp_path):
"""T71.2: with you + host + guest present, the assembled prompt
contains exactly ONE ``ACTIVITIES:`` header and bullets for all
three entities (no duplicate header from the prior dual-block
rendering).
"""
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
_seed_with_guest(conn)
msgs = assemble_narrative_prompt(
conn,
chat_id="chat_bot_a",
speaker_bot_id="bot_a",
recent_dialogue=[],
retrieved_memory_summaries=[],
)
body = msgs[0].content
# Exactly one ACTIVITIES: header.
assert body.count("ACTIVITIES:") == 1
# Bullets for all three entities (you=Sam, host=Aria, guest=Iris)
# — pin on the unique action verbs from the seed data.
assert "finishing emails" in body # you bullet
assert "pretending to work" in body # speaker (host) bullet
assert "smirking-distinctively" in body # guest bullet
def test_tight_budget_drops_guest_activity_bullet_first(tmp_path):
"""T71.2: under tight budget the speaker bullet survives but the
guest activity bullet is the first ACTIVITIES: bullet to drop. The
block as a whole stays present (it's MUST-tier); only its body
contracts.
"""
db = tmp_path / "t.db"
apply_migrations(db)
with open_db(db) as conn:
_seed_with_guest(conn)
dialogue = [
{"speaker": "you", "text": "line-16 hi there"},
{"speaker": "bot_a", "text": "line-17 hey"},
{"speaker": "you", "text": "line-18 quiet night"},
{"speaker": "bot_a", "text": "line-19 indeed"},
]
msgs = assemble_narrative_prompt(
conn,
chat_id="chat_bot_a",
speaker_bot_id="bot_a",
recent_dialogue=dialogue,
retrieved_memory_summaries=[],
budget_soft=250,
budget_hard=340,
)
body = msgs[0].content
# Speaker bullet survives (MUST-tier floor).
assert "pretending to work" in body
assert "ACTIVITIES:" in body
# Guest bullet is dropped first under budget pressure.
assert "smirking-distinctively" not in body
def test_nice_trim_order_documented(tmp_path):
"""T71.3: pin the NICE-tier trim order so a future refactor can't
quietly invert it.
Order under NICE pressure is:
1. previous-scene summary (dropped FIRST)
2. memories beyond top-2
3. older dialogue turns (collapsed to last-4)
We size the budget so that all-NICE-included is over soft, but
dropping ONLY previous-scene gets us back under soft. The observed
behaviour we pin: previous-scene gone, memories/dialogue intact.
"""
db = tmp_path / "t.db"
apply_migrations(db)
# Heavy previous-scene summary — large enough that dropping it
# alone clears the soft-budget overage. Defined out here so the
# marker is in scope for the assertions below.
prev_scene_blob = "PREVSCENE-MARKER " + ("filler " * 200)
with open_db(db) as conn:
# Append all events first, project once at the end (project is
# not idempotent — it replays every event in the log).
from chat.eventlog.log import append_event as _append
_append(conn, kind="bot_authored", payload={
"id": "bot_a",
"name": "Aria",
"persona": "reserved coworker who notices things",
"voice_samples": ["I — sorry, I didn't mean to."],
"traits": ["introverted"],
"backstory": "An archivist who joined the firm last spring.",
"initial_relationship_to_you": "coworker",
"kickoff_prose": "you stay late at the office",
})
_append(conn, kind="you_authored", payload={
"name": "Sam",
"pronouns": "they/them",
"persona": "tired analyst",
})
_append(conn, kind="chat_created", payload={
"id": "chat_bot_a",
"host_bot_id": "bot_a",
"guest_bot_id": None,
"initial_time": "2026-04-26T20:00:00+00:00",
"narrative_anchor": "Day 1 evening",
"weather": "clear",
})
_append(conn, kind="container_created", payload={
"chat_id": "chat_bot_a",
"name": "office bullpen",
"type": "workplace",
"properties": {"public": False, "moving": False, "audible_range": "room"},
})
_append(conn, kind="edge_update", payload={
"source_id": "bot_a",
"target_id": "you",
"affinity_delta": 12,
"trust_delta": 5,
"knowledge_facts": ["they work on the same floor"],
})
_append(conn, kind="activity_change", payload={
"entity_id": "you",
"container_id": 1,
"posture": "sitting at your desk",
"action": {"verb": "finishing emails"},
"attention": "the screen",
})
_append(conn, kind="activity_change", payload={
"entity_id": "bot_a",
"container_id": 1,
"posture": "sitting at her desk",
"action": {"verb": "pretending to work"},
"attention": "you, in glances",
})
_append(conn, kind="scene_opened", payload={
"chat_id": "chat_bot_a",
"container_id": 1,
"started_at": "2026-04-26T20:00:00+00:00",
"participants": ["you", "bot_a"],
})
# Close the seeded scene and write a per-POV summary memory so
# _resolve_previous_scene_summary returns a non-empty string.
_append(conn, kind="scene_closed", payload={
"scene_id": 1,
"ended_at": "2026-04-26T20:30:00+00:00",
"significance": 2,
})
_append(conn, kind="memory_written", payload={
"owner_id": "bot_a",
"chat_id": "chat_bot_a",
"scene_id": 1,
"pov_summary": prev_scene_blob,
"witness_you": 1,
"witness_host": 1,
"witness_guest": 0,
"source": "direct",
"reliability": 1.0,
"significance": 2,
})
project(conn)
# Six dialogue turns — last 4 plus 2 older. If older turns are
# dropped under NICE pressure, the unique markers for turns 0/1
# disappear; we'll assert they REMAIN to prove dialogue trim
# didn't fire.
dialogue = [
{"speaker": "you", "text": "DLG-OLD-00 hello"},
{"speaker": "bot_a", "text": "DLG-OLD-01 hi"},
{"speaker": "you", "text": "DLG-LAST-16 ok"},
{"speaker": "bot_a", "text": "DLG-LAST-17 sure"},
{"speaker": "you", "text": "DLG-LAST-18 night"},
{"speaker": "bot_a", "text": "DLG-LAST-19 indeed"},
]
# Four small memories — if "memories beyond top-2" trim fires,
# MEM-C/MEM-D disappear; we'll assert they REMAIN to prove
# memories trim didn't fire either.
memories = ["MEM-A short", "MEM-B short", "MEM-C short", "MEM-D short"]
# Soft tuned so the all-NICE config (with the heavy previous
# scene summary) overflows, but dropping just previous-scene
# fits comfortably. Hard set high so SHOULD-tier never trims.
msgs = assemble_narrative_prompt(
conn,
chat_id="chat_bot_a",
speaker_bot_id="bot_a",
recent_dialogue=dialogue,
retrieved_memory_summaries=memories,
budget_soft=400,
budget_hard=8000,
)
body = msgs[0].content
# Previous-scene summary was the FIRST NICE drop — its unique
# marker must be absent.
assert "PREVSCENE-MARKER" not in body
# Memories beyond top-2 stayed (proves memories trim did NOT fire).
assert "MEM-A" in body
assert "MEM-B" in body
assert "MEM-C" in body
assert "MEM-D" in body
# Older dialogue turns stayed (proves dialogue trim did NOT fire).
assert "DLG-OLD-00" in body
assert "DLG-OLD-01" in body
# Last-4 dialogue turns of course present.
assert "DLG-LAST-19" in body
def test_assemble_with_tight_budget_drops_guest_activity_first(tmp_path):
"""Under tight budget MUST blocks survive but SHOULD-tier guest
activity is dropped first."""