From 343f3055871ba0ba8482b6a8b603e56a05957202 Mon Sep 17 00:00:00 2001
From: Joseph Doherty <dohejw01@gmail.com>
Date: Sun, 26 Apr 2026 20:18:34 -0400
Subject: [PATCH] feat: significance-driven quote retention + thread emission
 on close (T58)

---
 chat/services/scene_summarize.py | 103 +++++++++++-
 tests/test_per_pov_summary.py    | 260 ++++++++++++++++++++++++++++++-
 2 files changed, 357 insertions(+), 6 deletions(-)

diff --git a/chat/services/scene_summarize.py b/chat/services/scene_summarize.py
index 2e74ddf..fa5958f 100644
--- a/chat/services/scene_summarize.py
+++ b/chat/services/scene_summarize.py
@@ -29,6 +29,8 @@ keeps moving.
 from __future__ import annotations
 
 import json
+import uuid
+from datetime import datetime, timezone
 from sqlite3 import Connection
 
 from pydantic import BaseModel, Field
@@ -167,6 +169,7 @@ async def _summarize_and_apply_for_witness(
     you_name: str,
     dialogue: list[dict],
     timeout_s: float,
+    key_quotes_suffix: str = "",
 ) -> ScenePOVSummary:
     """Run :func:`summarize_scene` for one bot witness and apply the
     three projected updates (memory pov_summary rewrite, edge summary
@@ -175,6 +178,10 @@ async def _summarize_and_apply_for_witness(
     Tolerant of missing pieces in the same way Phase 1 was: no memory
     row -> skip the rewrite; no edge row -> skip the edge_summary write
     (the empty-default classifier output simply yields no rewrites).
+
+    ``key_quotes_suffix`` is appended verbatim to the per-POV summary
+    text before the rewrite lands (T58.1) — empty string is the no-op
+    default for low-significance scenes.
     """
     from chat.state.edges import get_edge
     from chat.state.entities import get_bot
@@ -206,6 +213,7 @@ async def _summarize_and_apply_for_witness(
             # Empty default -> skip the memory rewrite; the seeded
             # per-turn pov_summary stays in place.
             continue
+        new_value = pov.summary + key_quotes_suffix
         append_and_apply(
             conn,
             kind="manual_edit",
@@ -213,7 +221,7 @@ async def _summarize_and_apply_for_witness(
                 "target_kind": "memory_pov_summary",
                 "target_id": int(memory_id),
                 "prior_value": prior_pov,
-                "new_value": pov.summary,
+                "new_value": new_value,
             },
         )
 
@@ -255,6 +263,40 @@ async def _summarize_and_apply_for_witness(
     return pov
 
 
+def _build_key_quotes_suffix(conn: Connection, scene_id: int) -> str:
+    """If the scene's max-turn-significance is >= 2, build the
+    "Key quotes:" suffix from the top-3 highest-significance memory rows
+    (per requirements §11.1). Otherwise return the empty string so the
+    per-POV summaries collapse fully (low-significance scenes lose all
+    raw text in favor of the classifier rewrite).
+
+    Quote source is each memory's current ``pov_summary`` — the raw
+    per-turn narrative seeded by T21, since this helper is called BEFORE
+    the per-POV rewrite. Texts are truncated to 200 chars to bound
+    memory row growth across many witnesses.
+    """
+    row = conn.execute(
+        "SELECT MAX(significance) FROM memories WHERE scene_id = ?",
+        (scene_id,),
+    ).fetchone()
+    max_sig = (row[0] if row else None) or 0
+    if max_sig < 2:
+        return ""
+    cur = conn.execute(
+        "SELECT pov_summary FROM memories WHERE scene_id = ? "
+        "ORDER BY significance DESC, id ASC LIMIT 3",
+        (scene_id,),
+    )
+    quotes = [
+        (r[0] or "")[:200]
+        for r in cur.fetchall()
+    ]
+    if not quotes:
+        return ""
+    lines = "\n".join(f'- "{q}"' for q in quotes)
+    return f"\n\nKey quotes:\n{lines}"
+
+
 async def apply_scene_close_summary(
     conn: Connection,
     client: LLMClient,
@@ -296,8 +338,10 @@ async def apply_scene_close_summary(
     """
     # Local imports to keep the module-level surface tight and avoid
     # any chance of a circular dep through chat.state.*.
+    from chat.services.thread_detection import detect_threads
     from chat.state.entities import get_bot, get_you
     from chat.state.group_node import get_group_node
+    from chat.state.threads import list_open_threads
     from chat.state.world import get_chat
 
     you_entity = get_you(conn) or {"name": "you", "persona": ""}
@@ -308,6 +352,11 @@ async def apply_scene_close_summary(
 
     dialogue = _read_recent_dialogue(conn, chat_id)
 
+    # T58.1: build the "Key quotes:" suffix BEFORE the per-POV rewrites
+    # land — quote source is the raw seeded pov_summary text on each
+    # memory row, which the rewrite about to fire would clobber.
+    key_quotes_suffix = _build_key_quotes_suffix(conn, scene_id)
+
     host_pov = await _summarize_and_apply_for_witness(
         conn,
         client,
@@ -318,6 +367,7 @@ async def apply_scene_close_summary(
         you_name=you_name,
         dialogue=dialogue,
         timeout_s=timeout_s,
+        key_quotes_suffix=key_quotes_suffix,
     )
 
     guest_pov: ScenePOVSummary | None = None
@@ -332,6 +382,7 @@ async def apply_scene_close_summary(
             you_name=you_name,
             dialogue=dialogue,
             timeout_s=timeout_s,
+            key_quotes_suffix=key_quotes_suffix,
         )
 
     # Group node update: T70 runs a third classifier call to merge the
@@ -364,6 +415,56 @@ async def apply_scene_close_summary(
             },
         )
 
+    # T58.2: thread detection on close. Reuses the dialogue we already
+    # gathered for per-POV summarization — same {speaker, text} shape
+    # detect_threads expects. Failure-tolerant: classify() returns the
+    # empty default on retry-exhaustion, and the broad except below
+    # protects the close pipeline from any other classifier/mock flap.
+    try:
+        thread_result = await detect_threads(
+            client,
+            classifier_model=classifier_model,
+            scene_transcript=dialogue,
+            open_threads=list_open_threads(conn, chat_id),
+            timeout_s=timeout_s,
+        )
+    except Exception:
+        from chat.services.thread_detection import ThreadDetectionResult
+
+        thread_result = ThreadDetectionResult()
+    for cand in thread_result.candidates:
+        if cand.action == "open":
+            new_thread_id = f"thr_{uuid.uuid4().hex[:12]}"
+            append_and_apply(
+                conn,
+                kind="thread_opened",
+                payload={
+                    "thread_id": new_thread_id,
+                    "chat_id": chat_id,
+                    "title": cand.title,
+                    "summary": cand.summary,
+                },
+            )
+        elif cand.action == "update" and cand.existing_thread_id:
+            append_and_apply(
+                conn,
+                kind="thread_updated",
+                payload={
+                    "thread_id": cand.existing_thread_id,
+                    "summary": cand.summary,
+                    "last_referenced_scene_id": scene_id,
+                },
+            )
+        elif cand.action == "close" and cand.existing_thread_id:
+            append_and_apply(
+                conn,
+                kind="thread_closed",
+                payload={
+                    "thread_id": cand.existing_thread_id,
+                    "closed_at": datetime.now(timezone.utc).isoformat(),
+                },
+            )
+
     return host_pov
 
 
diff --git a/tests/test_per_pov_summary.py b/tests/test_per_pov_summary.py
index c401ea8..2453b92 100644
--- a/tests/test_per_pov_summary.py
+++ b/tests/test_per_pov_summary.py
@@ -504,13 +504,15 @@ async def test_close_with_no_guest_matches_phase1(tmp_path):
             "relationship_summary": "BotA leaned in supportively.",
         }
     )
+    no_threads = json.dumps({"candidates": []})
     with open_db(db) as conn:
         _seed_single_bot_scene(conn)
         project(conn)
 
-        # canned has 2 entries to detect any over-call; the assertion below
-        # confirms only one was consumed.
-        client = MockLLMClient(canned=[canned, canned])
+        # 1 host-POV entry + 1 thread-detection entry (T58.2) + 1 spare
+        # to detect any over-call. Assertion below confirms exactly two
+        # were consumed.
+        client = MockLLMClient(canned=[canned, no_threads, canned])
         await apply_scene_close_summary(
             conn,
             client,
@@ -520,8 +522,8 @@ async def test_close_with_no_guest_matches_phase1(tmp_path):
             host_bot_id="bot_a",
         )
 
-        # Exactly one classifier call -> exactly one canned entry consumed,
-        # leaving the second untouched.
+        # Host POV + thread detection -> exactly two canned entries
+        # consumed, leaving the spare untouched.
         assert len(client._canned) == 1
 
         # Host memory rewritten with the per-POV summary content.
@@ -845,3 +847,251 @@ async def test_group_summary_skipped_when_no_guest(tmp_path):
             "SELECT 1 FROM event_log WHERE kind = 'group_node_updated'"
         ).fetchall()
         assert rows == []
+
+
+# ---------------------------------------------------------------------------
+# T58: significance-driven quote retention + thread detection on close.
+# ---------------------------------------------------------------------------
+
+
+def _seed_single_bot_scene_no_memory(conn) -> None:
+    """Like ``_seed_single_bot_scene`` but skips the memory_written event so
+    callers can seed memories with custom significance / text themselves."""
+    append_event(conn, kind="bot_authored", payload=_bot_payload("bot_a", "BotA"))
+    append_event(
+        conn,
+        kind="you_authored",
+        payload={"name": "Me", "pronouns": "they/them", "persona": "engineer"},
+    )
+    append_event(
+        conn,
+        kind="chat_created",
+        payload={
+            "id": "chat_bot_a",
+            "host_bot_id": "bot_a",
+            "initial_time": "2026-04-26T20:00:00+00:00",
+            "narrative_anchor": "Day 1",
+            "weather": "",
+        },
+    )
+    append_event(
+        conn,
+        kind="container_created",
+        payload={
+            "chat_id": "chat_bot_a",
+            "name": "office",
+            "type": "workplace",
+            "properties": {},
+        },
+    )
+    append_event(
+        conn,
+        kind="scene_opened",
+        payload={
+            "chat_id": "chat_bot_a",
+            "container_id": 1,
+            "started_at": "2026-04-26T20:00:00+00:00",
+            "participants": ["you", "bot_a"],
+        },
+    )
+    append_event(
+        conn,
+        kind="edge_update",
+        payload={
+            "source_id": "bot_a",
+            "target_id": "you",
+            "chat_id": "chat_bot_a",
+        },
+    )
+    append_event(
+        conn,
+        kind="user_turn",
+        payload={
+            "chat_id": "chat_bot_a",
+            "prose": "Quick chat about the deadline",
+            "segments": [],
+        },
+    )
+    append_event(
+        conn,
+        kind="assistant_turn",
+        payload={
+            "chat_id": "chat_bot_a",
+            "speaker_id": "bot_a",
+            "text": "It's going to be okay.",
+            "truncated": False,
+            "user_turn_id": 1,
+        },
+    )
+
+
+def _seed_memory(conn, *, pov_summary: str, significance: int) -> None:
+    append_event(
+        conn,
+        kind="memory_written",
+        payload={
+            "owner_id": "bot_a",
+            "chat_id": "chat_bot_a",
+            "scene_id": 1,
+            "pov_summary": pov_summary,
+            "witness_you": 1,
+            "witness_host": 1,
+            "witness_guest": 0,
+            "significance": significance,
+        },
+    )
+
+
+@pytest.mark.asyncio
+async def test_low_significance_scene_omits_quotes(tmp_path):
+    """When the scene's max-turn-significance is < 2, the per-POV summary
+    rewrite collapses fully — no "Key quotes:" suffix is appended."""
+    db = tmp_path / "t.db"
+    apply_migrations(db)
+    canned = json.dumps(
+        {
+            "summary": "BotA had a low-key chat with you.",
+            "knowledge_facts": [],
+            "relationship_summary": "Nothing major shifted.",
+        }
+    )
+    no_threads = json.dumps({"candidates": []})
+    with open_db(db) as conn:
+        _seed_single_bot_scene_no_memory(conn)
+        _seed_memory(conn, pov_summary="Maya rambled about coffee", significance=1)
+        _seed_memory(conn, pov_summary="Maya glanced at the clock", significance=0)
+        project(conn)
+
+        client = MockLLMClient(canned=[canned, no_threads])
+        await apply_scene_close_summary(
+            conn,
+            client,
+            classifier_model="x",
+            chat_id="chat_bot_a",
+            scene_id=1,
+            host_bot_id="bot_a",
+        )
+
+        rows = conn.execute(
+            "SELECT pov_summary FROM memories WHERE scene_id = 1"
+        ).fetchall()
+        assert rows
+        for (pov,) in rows:
+            assert "Key quotes:" not in pov
+            assert "BotA had a low-key chat" in pov
+
+
+@pytest.mark.asyncio
+async def test_high_significance_scene_includes_top_3_quotes(tmp_path):
+    """When max-turn-significance is >= 2, each per-POV summary text gains
+    a "Key quotes:" suffix listing the top-3 highest-significance memory
+    rows verbatim, ordered by (significance DESC, id ASC)."""
+    db = tmp_path / "t.db"
+    apply_migrations(db)
+    canned = json.dumps(
+        {
+            "summary": "BotA had a heavy talk with you.",
+            "knowledge_facts": [],
+            "relationship_summary": "Things shifted.",
+        }
+    )
+    no_threads = json.dumps({"candidates": []})
+    with open_db(db) as conn:
+        _seed_single_bot_scene_no_memory(conn)
+        # Insertion order matches id ASC. Top-3 by (sig DESC, id ASC):
+        # quote 1 (sig 3) -> quote 2 (sig 2, lower id) -> quote 4 (sig 2,
+        # higher id). quote 3 (sig 1) is dropped.
+        _seed_memory(conn, pov_summary="Maya quote one", significance=3)
+        _seed_memory(conn, pov_summary="Maya quote two", significance=2)
+        _seed_memory(conn, pov_summary="Maya quote three", significance=1)
+        _seed_memory(conn, pov_summary="Maya quote four", significance=2)
+        project(conn)
+
+        client = MockLLMClient(canned=[canned, no_threads])
+        await apply_scene_close_summary(
+            conn,
+            client,
+            classifier_model="x",
+            chat_id="chat_bot_a",
+            scene_id=1,
+            host_bot_id="bot_a",
+        )
+
+        rows = conn.execute(
+            "SELECT pov_summary FROM memories WHERE scene_id = 1"
+        ).fetchall()
+        assert rows
+        for (pov,) in rows:
+            assert "Key quotes:" in pov
+            assert '"Maya quote one"' in pov
+            assert '"Maya quote two"' in pov
+            assert '"Maya quote four"' in pov
+            # The sig-1 quote falls outside the top-3 cap.
+            assert '"Maya quote three"' not in pov
+            # Ordering: sig 3 first, then the two sig-2s by id ASC.
+            i_one = pov.index('"Maya quote one"')
+            i_two = pov.index('"Maya quote two"')
+            i_four = pov.index('"Maya quote four"')
+            assert i_one < i_two < i_four
+
+
+@pytest.mark.asyncio
+async def test_thread_detection_emits_events(tmp_path, monkeypatch):
+    """On scene close, ``detect_threads`` is invoked and each "open"
+    candidate yields a ``thread_opened`` event with a fresh thread_id."""
+    from chat.services import thread_detection as td_mod
+
+    canned = json.dumps(
+        {
+            "summary": "BotA noticed something unresolved.",
+            "knowledge_facts": [],
+            "relationship_summary": "Tension lingered.",
+        }
+    )
+
+    async def fake_detect_threads(client, **kwargs):
+        return td_mod.ThreadDetectionResult(
+            candidates=[
+                td_mod.ThreadCandidate(
+                    action="open",
+                    title="Test thread",
+                    summary="A test",
+                    existing_thread_id=None,
+                ),
+            ]
+        )
+
+    monkeypatch.setattr(td_mod, "detect_threads", fake_detect_threads)
+
+    db = tmp_path / "t.db"
+    apply_migrations(db)
+    with open_db(db) as conn:
+        _seed_single_bot_scene(conn)
+        project(conn)
+
+        client = MockLLMClient(canned=[canned])
+        await apply_scene_close_summary(
+            conn,
+            client,
+            classifier_model="x",
+            chat_id="chat_bot_a",
+            scene_id=1,
+            host_bot_id="bot_a",
+        )
+
+        rows = conn.execute(
+            "SELECT payload_json FROM event_log WHERE kind = 'thread_opened'"
+        ).fetchall()
+        assert len(rows) == 1
+        payload = json.loads(rows[0][0])
+        assert payload["title"] == "Test thread"
+        assert payload["summary"] == "A test"
+        assert payload["chat_id"] == "chat_bot_a"
+        assert payload["thread_id"].startswith("thr_")
+
+        # The threads-table projection ran via append_and_apply.
+        from chat.state.threads import list_open_threads
+
+        open_threads = list_open_threads(conn, "chat_bot_a")
+        assert len(open_threads) == 1
+        assert open_threads[0]["title"] == "Test thread"